Spaces:
Sleeping
Sleeping
some consilidation
Browse files- api_utils.py +2 -2
- category_matching.py +1 -1
- ui.py +54 -101
- ui_expanded_matching.py +50 -268
- ui_formatters.py +241 -122
- ui_hybrid_matching.py +50 -36
api_utils.py
CHANGED
@@ -112,7 +112,7 @@ def openai_structured_query(
|
|
112 |
prompt: str,
|
113 |
system_message: str = "You are a helpful assistant.",
|
114 |
schema: dict = None,
|
115 |
-
model: str = "
|
116 |
client=None,
|
117 |
schema_name: str = "structured_output"
|
118 |
) -> dict:
|
@@ -233,7 +233,7 @@ def rank_ingredients_openai(
|
|
233 |
# Make the API call directly for more control
|
234 |
response = client.responses.create(
|
235 |
model=model,
|
236 |
-
reasoning={"effort": "low"},
|
237 |
input=[
|
238 |
{"role": "system", "content": f"You are a food ingredient matching expert. Rank the top {max_results} ingredient based on how well they match the given product. Only include ingredients with relevance score >= {confidence_threshold}."},
|
239 |
{"role": "user", "content": prompt}
|
|
|
112 |
prompt: str,
|
113 |
system_message: str = "You are a helpful assistant.",
|
114 |
schema: dict = None,
|
115 |
+
model: str = "gpt-4o-mini",
|
116 |
client=None,
|
117 |
schema_name: str = "structured_output"
|
118 |
) -> dict:
|
|
|
233 |
# Make the API call directly for more control
|
234 |
response = client.responses.create(
|
235 |
model=model,
|
236 |
+
# reasoning={"effort": "low"},
|
237 |
input=[
|
238 |
{"role": "system", "content": f"You are a food ingredient matching expert. Rank the top {max_results} ingredient based on how well they match the given product. Only include ingredients with relevance score >= {confidence_threshold}."},
|
239 |
{"role": "user", "content": prompt}
|
category_matching.py
CHANGED
@@ -220,7 +220,7 @@ def hybrid_category_matching(products: List[str], categories: Dict[str, str],
|
|
220 |
|
221 |
# Extract just the category descriptions for re-ranking
|
222 |
candidate_ids = [c[0] for c in candidates]
|
223 |
-
candidate_texts = [f"
|
224 |
|
225 |
try:
|
226 |
# Apply re-ranking to the candidates
|
|
|
220 |
|
221 |
# Extract just the category descriptions for re-ranking
|
222 |
candidate_ids = [c[0] for c in candidates]
|
223 |
+
candidate_texts = [f"{c[1]}" for c in candidates]
|
224 |
|
225 |
try:
|
226 |
# Apply re-ranking to the candidates
|
ui.py
CHANGED
@@ -1,16 +1,12 @@
|
|
1 |
import gradio as gr
|
2 |
from comparison import compare_ingredient_methods_ui
|
3 |
|
4 |
-
# Import from our
|
5 |
from ui_core import embeddings, get_css, load_examples
|
6 |
from ui_ingredient_matching import categorize_products
|
7 |
from ui_category_matching import categorize_products_by_category
|
8 |
-
|
9 |
from ui_hybrid_matching import categorize_products_with_voyage_reranking
|
10 |
-
|
11 |
-
|
12 |
from ui_expanded_matching import categorize_products_with_openai_reranking
|
13 |
-
from ui_formatters import get_formatted_css
|
14 |
|
15 |
def create_demo():
|
16 |
"""Create the Gradio interface"""
|
@@ -19,7 +15,7 @@ def create_demo():
|
|
19 |
|
20 |
with gr.Tabs() as tabs:
|
21 |
# Original Ingredient Matching Tab
|
22 |
-
with gr.TabItem("Ingredient
|
23 |
with gr.Row():
|
24 |
with gr.Column(scale=1):
|
25 |
# Input section
|
@@ -43,7 +39,7 @@ def create_demo():
|
|
43 |
|
44 |
|
45 |
# New Category Matching Tab
|
46 |
-
with gr.TabItem("Category
|
47 |
with gr.Row():
|
48 |
with gr.Column(scale=1):
|
49 |
# Input section
|
@@ -65,73 +61,59 @@ def create_demo():
|
|
65 |
# Results section
|
66 |
category_output = gr.HTML(label="Category Matching Results", elem_id="results-container")
|
67 |
|
68 |
-
#
|
69 |
-
|
70 |
-
with gr.
|
71 |
-
with gr.
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
-
with gr.
|
96 |
-
|
97 |
-
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
with gr.Column(scale=1):
|
107 |
-
# Input section
|
108 |
-
openai_text_input = gr.Textbox(
|
109 |
-
lines=10,
|
110 |
-
placeholder="Enter product names, one per line",
|
111 |
-
label="Product Names"
|
112 |
-
)
|
113 |
-
openai_input_controls = gr.Row()
|
114 |
-
with openai_input_controls:
|
115 |
-
openai_expansion_switch = gr.Checkbox(value=False, label="Use Description Expansion",
|
116 |
-
info="Expand product descriptions using AI before matching")
|
117 |
-
openai_top_n = gr.Slider(1, 20, 10, step=1, label="Top N Results")
|
118 |
-
openai_confidence = gr.Slider(0.1, 0.9, 0.5, label="Matching Threshold")
|
119 |
-
|
120 |
-
# Add toggle here for matching type
|
121 |
-
openai_match_type = gr.Radio(
|
122 |
-
choices=["ingredients", "categories"],
|
123 |
-
value="ingredients",
|
124 |
-
label="Match Type",
|
125 |
-
info="Choose whether to match against ingredients or categories"
|
126 |
-
)
|
127 |
-
|
128 |
-
with gr.Row():
|
129 |
-
openai_match_btn = gr.Button("Match with OpenAI Reranking", variant="primary")
|
130 |
-
openai_examples_btn = gr.Button("Load Examples")
|
131 |
|
132 |
-
|
133 |
-
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
# New Comparison Tab
|
137 |
with gr.TabItem("Compare Methods"):
|
@@ -196,37 +178,8 @@ def create_demo():
|
|
196 |
inputs=[category_text_input, gr.State(False), category_top_n, category_confidence],
|
197 |
outputs=[category_output],
|
198 |
)
|
199 |
-
|
200 |
-
|
201 |
-
# Connect buttons for Voyage reranking (previously hybrid matching)
|
202 |
-
voyage_match_btn.click(
|
203 |
-
fn=categorize_products_with_voyage_reranking, # New function to create
|
204 |
-
inputs=[voyage_text_input, gr.State(False), voyage_expansion_switch, voyage_embedding_top_n,
|
205 |
-
voyage_final_top_n, voyage_confidence, voyage_match_type],
|
206 |
-
outputs=[voyage_output],
|
207 |
-
)
|
208 |
-
|
209 |
-
voyage_examples_btn.click(
|
210 |
-
fn=load_examples,
|
211 |
-
inputs=[],
|
212 |
-
outputs=voyage_text_input
|
213 |
-
)
|
214 |
-
|
215 |
-
# Connect buttons for OpenAI reranking (previously expanded description matching)
|
216 |
-
openai_match_btn.click(
|
217 |
-
fn=categorize_products_with_openai_reranking, # New function to create
|
218 |
-
inputs=[openai_text_input, gr.State(False), openai_expansion_switch,
|
219 |
-
openai_top_n, openai_confidence, openai_match_type],
|
220 |
-
outputs=[openai_output],
|
221 |
-
)
|
222 |
-
|
223 |
-
openai_examples_btn.click(
|
224 |
-
fn=load_examples,
|
225 |
-
inputs=[],
|
226 |
-
outputs=openai_text_input
|
227 |
-
)
|
228 |
|
229 |
-
# Examples buttons
|
230 |
examples_btn.click(
|
231 |
fn=load_examples,
|
232 |
inputs=[],
|
|
|
1 |
import gradio as gr
|
2 |
from comparison import compare_ingredient_methods_ui
|
3 |
|
4 |
+
# Import from our UI modules
|
5 |
from ui_core import embeddings, get_css, load_examples
|
6 |
from ui_ingredient_matching import categorize_products
|
7 |
from ui_category_matching import categorize_products_by_category
|
|
|
8 |
from ui_hybrid_matching import categorize_products_with_voyage_reranking
|
|
|
|
|
9 |
from ui_expanded_matching import categorize_products_with_openai_reranking
|
|
|
10 |
|
11 |
def create_demo():
|
12 |
"""Create the Gradio interface"""
|
|
|
15 |
|
16 |
with gr.Tabs() as tabs:
|
17 |
# Original Ingredient Matching Tab
|
18 |
+
with gr.TabItem("Ingredient Embeddings"):
|
19 |
with gr.Row():
|
20 |
with gr.Column(scale=1):
|
21 |
# Input section
|
|
|
39 |
|
40 |
|
41 |
# New Category Matching Tab
|
42 |
+
with gr.TabItem("Category Embeddings"):
|
43 |
with gr.Row():
|
44 |
with gr.Column(scale=1):
|
45 |
# Input section
|
|
|
61 |
# Results section
|
62 |
category_output = gr.HTML(label="Category Matching Results", elem_id="results-container")
|
63 |
|
64 |
+
# Common function to create reranking UI tabs
|
65 |
+
def create_reranking_tab(tab_name, fn_name, default_match="ingredients"):
|
66 |
+
with gr.TabItem(tab_name):
|
67 |
+
with gr.Row():
|
68 |
+
with gr.Column(scale=1):
|
69 |
+
# Input section
|
70 |
+
tab_input = gr.Textbox(
|
71 |
+
lines=10,
|
72 |
+
placeholder="Enter product names, one per line",
|
73 |
+
label="Product Names"
|
74 |
+
)
|
75 |
+
with gr.Row():
|
76 |
+
tab_expansion = gr.Checkbox(
|
77 |
+
value=False,
|
78 |
+
label="Use Description Expansion",
|
79 |
+
info="Expand product descriptions using AI before matching"
|
80 |
+
)
|
81 |
+
tab_emb_top_n = gr.Slider(1, 50, 20, step=1, label="Embedding Top N Results")
|
82 |
+
tab_top_n = gr.Slider(1, 10, 5, step=1, label="Final Top N Results")
|
83 |
+
tab_confidence = gr.Slider(0.1, 0.9, 0.5, label="Matching Threshold")
|
84 |
+
|
85 |
+
tab_match_type = gr.Radio(
|
86 |
+
choices=["ingredients", "categories"],
|
87 |
+
value=default_match,
|
88 |
+
label="Match Type",
|
89 |
+
info="Choose whether to match against ingredients or categories"
|
90 |
+
)
|
91 |
+
|
92 |
+
with gr.Row():
|
93 |
+
tab_examples_btn = gr.Button("Load Examples", variant="secondary")
|
94 |
+
tab_match_btn = gr.Button(f"Match using {tab_name}", variant="primary")
|
95 |
|
96 |
+
with gr.Column(scale=1):
|
97 |
+
# Results section
|
98 |
+
tab_output = gr.HTML(label=f"{tab_name} Results", elem_id="results-container")
|
99 |
|
100 |
+
# Connect button events
|
101 |
+
tab_match_btn.click(
|
102 |
+
fn=fn_name,
|
103 |
+
inputs=[tab_input, gr.State(False), tab_expansion, tab_emb_top_n,
|
104 |
+
tab_top_n, tab_confidence, tab_match_type],
|
105 |
+
outputs=[tab_output],
|
106 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
+
tab_examples_btn.click(
|
109 |
+
fn=load_examples,
|
110 |
+
inputs=[],
|
111 |
+
outputs=tab_input
|
112 |
+
)
|
113 |
+
|
114 |
+
# Create the reranking tabs using the shared function
|
115 |
+
create_reranking_tab("Voyage AI Reranking", categorize_products_with_voyage_reranking, "categories")
|
116 |
+
create_reranking_tab("OpenAI Reranking", categorize_products_with_openai_reranking, "ingredients")
|
117 |
|
118 |
# New Comparison Tab
|
119 |
with gr.TabItem("Compare Methods"):
|
|
|
178 |
inputs=[category_text_input, gr.State(False), category_top_n, category_confidence],
|
179 |
outputs=[category_output],
|
180 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
+
# Examples buttons for the first two tabs
|
183 |
examples_btn.click(
|
184 |
fn=load_examples,
|
185 |
inputs=[],
|
ui_expanded_matching.py
CHANGED
@@ -4,248 +4,19 @@ from embeddings import create_product_embeddings
|
|
4 |
from similarity import compute_similarities
|
5 |
from openai_expansion import expand_product_descriptions
|
6 |
from ui_core import embeddings, parse_input, CATEGORY_EMBEDDINGS_PATH
|
7 |
-
from ui_formatters import
|
8 |
from api_utils import get_openai_client, process_in_parallel, rank_ingredients_openai, rank_categories_openai
|
9 |
from category_matching import load_categories, load_category_embeddings
|
10 |
import json
|
11 |
-
import os
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
Categorize products using expanded descriptions from OpenAI
|
17 |
-
|
18 |
-
Args:
|
19 |
-
product_input: Text input with product names
|
20 |
-
is_file: Whether the input is a file
|
21 |
-
top_n: Number of top results to show
|
22 |
-
confidence_threshold: Confidence threshold for matches
|
23 |
-
match_type: Either "ingredients" or "categories"
|
24 |
-
progress: Progress tracking object
|
25 |
-
|
26 |
-
Returns:
|
27 |
-
HTML formatted results
|
28 |
-
"""
|
29 |
-
progress_tracker = SafeProgress(progress)
|
30 |
-
progress_tracker(0, desc="Starting...")
|
31 |
-
|
32 |
-
# Parse input
|
33 |
-
product_names, error = parse_input(product_input, is_file)
|
34 |
-
if error:
|
35 |
-
return error
|
36 |
-
|
37 |
-
# Validate embeddings are loaded if doing ingredient matching
|
38 |
-
if match_type == "ingredients" and not embeddings:
|
39 |
-
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No ingredient embeddings loaded. Please check that the embeddings file exists and is properly formatted.</div>"
|
40 |
-
|
41 |
-
# Expand product descriptions
|
42 |
-
progress_tracker(0.2, desc="Expanding product descriptions...")
|
43 |
-
expanded_descriptions = expand_product_descriptions(product_names, progress=progress)
|
44 |
-
|
45 |
-
if not expanded_descriptions:
|
46 |
-
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: Failed to expand product descriptions. Please try again or check your OpenAI API key.</div>"
|
47 |
-
|
48 |
-
# Get shared OpenAI client
|
49 |
-
openai_client = get_openai_client()
|
50 |
-
|
51 |
-
if match_type == "ingredients":
|
52 |
-
# Generate product embeddings
|
53 |
-
progress_tracker(0.4, desc="Generating product embeddings...")
|
54 |
-
product_embeddings = create_product_embeddings(product_names, progress=progress)
|
55 |
-
|
56 |
-
# Compute embedding similarities for ingredients
|
57 |
-
progress_tracker(0.6, desc="Computing ingredient similarities...")
|
58 |
-
all_similarities = compute_similarities(embeddings, product_embeddings)
|
59 |
-
|
60 |
-
if not all_similarities:
|
61 |
-
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No similarities found. Please try different product names.</div>"
|
62 |
-
|
63 |
-
# Setup for OpenAI reranking
|
64 |
-
embedding_top_n = 20 # Number of candidates to consider from embeddings
|
65 |
-
|
66 |
-
progress_tracker(0.7, desc="Re-ranking with expanded descriptions...")
|
67 |
-
|
68 |
-
# Function for processing each product
|
69 |
-
def process_reranking(product):
|
70 |
-
if product not in all_similarities:
|
71 |
-
return product, []
|
72 |
-
|
73 |
-
candidates = all_similarities[product][:embedding_top_n]
|
74 |
-
if not candidates:
|
75 |
-
return product, []
|
76 |
-
|
77 |
-
candidate_ingredients = [c[0] for c in candidates]
|
78 |
-
expanded_text = expanded_descriptions.get(product, "")
|
79 |
-
|
80 |
-
try:
|
81 |
-
# Use the shared utility function
|
82 |
-
reranked_ingredients = rank_ingredients_openai(
|
83 |
-
product=product,
|
84 |
-
candidates=candidate_ingredients,
|
85 |
-
expanded_description=expanded_text,
|
86 |
-
client=openai_client,
|
87 |
-
model="o3-mini",
|
88 |
-
max_results=top_n,
|
89 |
-
confidence_threshold=confidence_threshold,
|
90 |
-
debug=True
|
91 |
-
)
|
92 |
-
|
93 |
-
return product, reranked_ingredients
|
94 |
-
|
95 |
-
except Exception as e:
|
96 |
-
print(f"Error reranking {product}: {e}")
|
97 |
-
# Fall back to top embedding match
|
98 |
-
return product, candidates[:1] if candidates[0][1] >= confidence_threshold else []
|
99 |
-
|
100 |
-
# Process all products in parallel
|
101 |
-
final_results = process_in_parallel(
|
102 |
-
items=product_names,
|
103 |
-
processor_func=process_reranking,
|
104 |
-
max_workers=min(10, len(product_names)),
|
105 |
-
progress_tracker=progress_tracker,
|
106 |
-
progress_start=0.7,
|
107 |
-
progress_end=0.9,
|
108 |
-
progress_desc="Re-ranking"
|
109 |
-
)
|
110 |
-
|
111 |
-
else: # categories
|
112 |
-
# Load category embeddings instead of JSON categories
|
113 |
-
progress_tracker(0.5, desc="Loading category embeddings...")
|
114 |
-
category_embeddings = load_category_embeddings()
|
115 |
-
|
116 |
-
if not category_embeddings:
|
117 |
-
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No category embeddings found. Please check that the embeddings file exists at data/category_embeddings.pickle.</div>"
|
118 |
-
|
119 |
-
# Generate product embeddings
|
120 |
-
progress_tracker(0.6, desc="Generating product embeddings...")
|
121 |
-
product_embeddings = create_product_embeddings(product_names, progress=progress)
|
122 |
-
|
123 |
-
# Compute embedding similarities for categories
|
124 |
-
progress_tracker(0.7, desc="Computing category similarities...")
|
125 |
-
all_similarities = compute_similarities(category_embeddings, product_embeddings)
|
126 |
-
print(f'All similarities: {all_similarities}')
|
127 |
-
if not all_similarities:
|
128 |
-
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No category similarities found. Please try different product names.</div>"
|
129 |
-
|
130 |
-
embedding_top_n = min(20, top_n * 2) # Number of candidates to consider from embeddings
|
131 |
-
|
132 |
-
# Collect all needed category IDs first
|
133 |
-
needed_category_ids = set()
|
134 |
-
for product, similarities in all_similarities.items():
|
135 |
-
for category_id, score in similarities[:embedding_top_n]:
|
136 |
-
if score >= confidence_threshold:
|
137 |
-
needed_category_ids.add(category_id)
|
138 |
-
|
139 |
-
# Load only the needed categories from JSON
|
140 |
-
progress_tracker(0.75, desc="Loading category descriptions...")
|
141 |
-
category_descriptions = {}
|
142 |
-
if needed_category_ids:
|
143 |
-
try:
|
144 |
-
with open("categories.json", 'r') as f:
|
145 |
-
categories_list = json.load(f)
|
146 |
-
for item in categories_list:
|
147 |
-
if item["id"] in needed_category_ids:
|
148 |
-
category_descriptions[item["id"]] = item["text"]
|
149 |
-
except Exception as e:
|
150 |
-
print(f"Error loading category descriptions: {e}")
|
151 |
-
|
152 |
-
# Function to process each product
|
153 |
-
def process_category_matching(product):
|
154 |
-
if product not in all_similarities:
|
155 |
-
return product, []
|
156 |
-
|
157 |
-
# candidates = all_similarities[product][:embedding_top_n]
|
158 |
-
candidates = all_similarities[product][:embedding_top_n]
|
159 |
-
print(f'candidates: {candidates}')
|
160 |
-
if not candidates:
|
161 |
-
return product, []
|
162 |
-
|
163 |
-
# Get the expanded description
|
164 |
-
expanded_text = expanded_descriptions.get(product, "")
|
165 |
-
|
166 |
-
try:
|
167 |
-
# Use rank_categories_openai instead of match_products_to_categories_with_description
|
168 |
-
category_matches = rank_categories_openai(
|
169 |
-
product=product,
|
170 |
-
categories=category_descriptions,
|
171 |
-
expanded_description=expanded_text,
|
172 |
-
client=openai_client,
|
173 |
-
# model="o3-mini",
|
174 |
-
model="gpt-4o-mini",
|
175 |
-
# model="gpt-4o",
|
176 |
-
max_results=top_n,
|
177 |
-
confidence_threshold=confidence_threshold,
|
178 |
-
debug=True
|
179 |
-
)
|
180 |
-
|
181 |
-
# Format results with category descriptions if needed
|
182 |
-
formatted_matches = []
|
183 |
-
for category_id, score in category_matches:
|
184 |
-
category_text = category_descriptions.get(category_id, "Unknown category")
|
185 |
-
formatted_matches.append((category_id, category_text, score))
|
186 |
-
|
187 |
-
return product, formatted_matches
|
188 |
-
except Exception as e:
|
189 |
-
print(f"Error matching {product} to categories: {e}")
|
190 |
-
return product, []
|
191 |
-
|
192 |
-
# Process all products in parallel
|
193 |
-
final_results = process_in_parallel(
|
194 |
-
items=product_names,
|
195 |
-
processor_func=process_category_matching,
|
196 |
-
max_workers=min(10, len(product_names)),
|
197 |
-
progress_tracker=progress_tracker,
|
198 |
-
progress_start=0.7,
|
199 |
-
progress_end=0.9,
|
200 |
-
progress_desc="Category matching"
|
201 |
-
)
|
202 |
-
|
203 |
-
# Format results
|
204 |
-
progress_tracker(0.9, desc="Formatting results...")
|
205 |
-
|
206 |
-
result_elements = []
|
207 |
-
for product, matches in final_results.items():
|
208 |
-
result_elements.append(
|
209 |
-
format_expanded_results_html(
|
210 |
-
product=product,
|
211 |
-
results=matches,
|
212 |
-
expanded_description=expanded_descriptions.get(product, ""),
|
213 |
-
match_type=match_type
|
214 |
-
)
|
215 |
-
)
|
216 |
-
|
217 |
-
output_html = create_results_container(
|
218 |
-
result_elements,
|
219 |
-
header_text=f"Matched {len(product_names)} products to {match_type} using expanded descriptions."
|
220 |
-
)
|
221 |
-
|
222 |
-
if not final_results:
|
223 |
-
output_html = "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>"
|
224 |
-
|
225 |
-
progress_tracker(1.0, desc="Done!")
|
226 |
-
return output_html
|
227 |
-
|
228 |
-
def categorize_products_with_openai_reranking(product_input, is_file=False, expansion_strength=0.0,
|
229 |
-
top_n=10, confidence_threshold=0.5, match_type="ingredients",
|
230 |
-
progress=gr.Progress()):
|
231 |
"""
|
232 |
Categorize products using OpenAI reranking with optional description expansion
|
233 |
-
|
234 |
-
Args:
|
235 |
-
product_input: Text input with product names
|
236 |
-
is_file: Whether the input is a file
|
237 |
-
expansion_strength: 0.0-1.0 slider value for description expansion (0=none, 1=full)
|
238 |
-
top_n: Number of top results to show
|
239 |
-
confidence_threshold: Confidence threshold for matches
|
240 |
-
match_type: Either "ingredients" or "categories"
|
241 |
-
progress: Progress tracking object
|
242 |
-
|
243 |
-
Returns:
|
244 |
-
HTML formatted results
|
245 |
"""
|
246 |
progress_tracker = SafeProgress(progress)
|
247 |
progress_tracker(0, desc="Starting OpenAI reranking...")
|
248 |
-
|
249 |
# Parse input
|
250 |
product_names, error = parse_input(product_input, is_file)
|
251 |
if error:
|
@@ -254,15 +25,11 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
|
|
254 |
# Validate embeddings are loaded if doing ingredient matching
|
255 |
if match_type == "ingredients" and not embeddings:
|
256 |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No ingredient embeddings loaded. Please check that the embeddings file exists and is properly formatted.</div>"
|
257 |
-
|
258 |
# Optional description expansion
|
259 |
expanded_descriptions = {}
|
260 |
-
if
|
261 |
progress_tracker(0.2, desc="Expanding product descriptions...")
|
262 |
expanded_descriptions = expand_product_descriptions(product_names, progress=progress)
|
263 |
-
else:
|
264 |
-
# If no expansion, use product names as is (minimal descriptions)
|
265 |
-
expanded_descriptions = {product: product for product in product_names}
|
266 |
|
267 |
# Get shared OpenAI client
|
268 |
openai_client = get_openai_client()
|
@@ -279,9 +46,6 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
|
|
279 |
if not all_similarities:
|
280 |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No similarities found. Please try different product names.</div>"
|
281 |
|
282 |
-
# Setup for OpenAI reranking
|
283 |
-
embedding_top_n = 20 # Number of candidates to consider from embeddings
|
284 |
-
|
285 |
progress_tracker(0.7, desc="Re-ranking with OpenAI...")
|
286 |
|
287 |
# Function for processing each product
|
@@ -294,10 +58,11 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
|
|
294 |
return product, []
|
295 |
|
296 |
candidate_ingredients = [c[0] for c in candidates]
|
297 |
-
expanded_text = expanded_descriptions.get(product, product)
|
298 |
|
299 |
try:
|
300 |
-
# Use the shared utility function
|
|
|
301 |
reranked_ingredients = rank_ingredients_openai(
|
302 |
product=product,
|
303 |
candidates=candidate_ingredients,
|
@@ -305,7 +70,7 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
|
|
305 |
client=openai_client,
|
306 |
model="o3-mini",
|
307 |
max_results=top_n,
|
308 |
-
confidence_threshold=
|
309 |
debug=True
|
310 |
)
|
311 |
|
@@ -314,7 +79,7 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
|
|
314 |
except Exception as e:
|
315 |
print(f"Error reranking {product}: {e}")
|
316 |
# Fall back to top embedding match
|
317 |
-
return product, candidates[:1]
|
318 |
|
319 |
# Process all products in parallel
|
320 |
final_results = process_in_parallel(
|
@@ -346,14 +111,11 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
|
|
346 |
if not all_similarities:
|
347 |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No category similarities found. Please try different product names.</div>"
|
348 |
|
349 |
-
|
350 |
-
|
351 |
-
# Collect all needed category IDs first
|
352 |
needed_category_ids = set()
|
353 |
for product, similarities in all_similarities.items():
|
354 |
for category_id, score in similarities[:embedding_top_n]:
|
355 |
-
|
356 |
-
needed_category_ids.add(category_id)
|
357 |
|
358 |
# Load only the needed categories from JSON
|
359 |
progress_tracker(0.75, desc="Loading category descriptions...")
|
@@ -378,9 +140,10 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
|
|
378 |
return product, []
|
379 |
|
380 |
# Get the expanded description or use product name if no expansion
|
381 |
-
expanded_text = expanded_descriptions.get(product, product)
|
382 |
|
383 |
try:
|
|
|
384 |
category_matches = rank_categories_openai(
|
385 |
product=product,
|
386 |
categories=category_descriptions,
|
@@ -388,7 +151,7 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
|
|
388 |
client=openai_client,
|
389 |
model="gpt-4o-mini",
|
390 |
max_results=top_n,
|
391 |
-
confidence_threshold=
|
392 |
debug=True
|
393 |
)
|
394 |
|
@@ -417,24 +180,43 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
|
|
417 |
# Format results
|
418 |
progress_tracker(0.9, desc="Formatting results...")
|
419 |
|
420 |
-
|
|
|
|
|
421 |
for product, matches in final_results.items():
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
)
|
429 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
430 |
|
431 |
-
|
432 |
-
|
433 |
-
header_text=f"Matched {len(product_names)} products to {match_type} using OpenAI reranking."
|
434 |
-
)
|
435 |
|
436 |
-
|
437 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
|
439 |
progress_tracker(1.0, desc="Done!")
|
440 |
-
return
|
|
|
4 |
from similarity import compute_similarities
|
5 |
from openai_expansion import expand_product_descriptions
|
6 |
from ui_core import embeddings, parse_input, CATEGORY_EMBEDDINGS_PATH
|
7 |
+
from ui_formatters import format_reranking_results_html
|
8 |
from api_utils import get_openai_client, process_in_parallel, rank_ingredients_openai, rank_categories_openai
|
9 |
from category_matching import load_categories, load_category_embeddings
|
10 |
import json
|
|
|
11 |
|
12 |
+
def categorize_products_with_openai_reranking(product_input, is_file=False, use_expansion=False,
|
13 |
+
embedding_top_n=20, top_n=10, confidence_threshold=0.5,
|
14 |
+
match_type="ingredients", progress=gr.Progress()):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
"""
|
16 |
Categorize products using OpenAI reranking with optional description expansion
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
"""
|
18 |
progress_tracker = SafeProgress(progress)
|
19 |
progress_tracker(0, desc="Starting OpenAI reranking...")
|
|
|
20 |
# Parse input
|
21 |
product_names, error = parse_input(product_input, is_file)
|
22 |
if error:
|
|
|
25 |
# Validate embeddings are loaded if doing ingredient matching
|
26 |
if match_type == "ingredients" and not embeddings:
|
27 |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No ingredient embeddings loaded. Please check that the embeddings file exists and is properly formatted.</div>"
|
|
|
28 |
# Optional description expansion
|
29 |
expanded_descriptions = {}
|
30 |
+
if use_expansion:
|
31 |
progress_tracker(0.2, desc="Expanding product descriptions...")
|
32 |
expanded_descriptions = expand_product_descriptions(product_names, progress=progress)
|
|
|
|
|
|
|
33 |
|
34 |
# Get shared OpenAI client
|
35 |
openai_client = get_openai_client()
|
|
|
46 |
if not all_similarities:
|
47 |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No similarities found. Please try different product names.</div>"
|
48 |
|
|
|
|
|
|
|
49 |
progress_tracker(0.7, desc="Re-ranking with OpenAI...")
|
50 |
|
51 |
# Function for processing each product
|
|
|
58 |
return product, []
|
59 |
|
60 |
candidate_ingredients = [c[0] for c in candidates]
|
61 |
+
expanded_text = expanded_descriptions.get(product, product) if use_expansion else product
|
62 |
|
63 |
try:
|
64 |
+
# Use the shared utility function - now passing 0.0 as threshold to get all results
|
65 |
+
# We'll apply the threshold at display time
|
66 |
reranked_ingredients = rank_ingredients_openai(
|
67 |
product=product,
|
68 |
candidates=candidate_ingredients,
|
|
|
70 |
client=openai_client,
|
71 |
model="o3-mini",
|
72 |
max_results=top_n,
|
73 |
+
confidence_threshold=0.0, # Don't filter here, do it at display time
|
74 |
debug=True
|
75 |
)
|
76 |
|
|
|
79 |
except Exception as e:
|
80 |
print(f"Error reranking {product}: {e}")
|
81 |
# Fall back to top embedding match
|
82 |
+
return product, candidates[:1] # Don't filter here
|
83 |
|
84 |
# Process all products in parallel
|
85 |
final_results = process_in_parallel(
|
|
|
111 |
if not all_similarities:
|
112 |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No category similarities found. Please try different product names.</div>"
|
113 |
|
114 |
+
# Collect all needed category IDs first - don't filter by threshold here
|
|
|
|
|
115 |
needed_category_ids = set()
|
116 |
for product, similarities in all_similarities.items():
|
117 |
for category_id, score in similarities[:embedding_top_n]:
|
118 |
+
needed_category_ids.add(category_id)
|
|
|
119 |
|
120 |
# Load only the needed categories from JSON
|
121 |
progress_tracker(0.75, desc="Loading category descriptions...")
|
|
|
140 |
return product, []
|
141 |
|
142 |
# Get the expanded description or use product name if no expansion
|
143 |
+
expanded_text = expanded_descriptions.get(product, product) if use_expansion else product
|
144 |
|
145 |
try:
|
146 |
+
# Pass 0.0 as threshold to get all results - apply threshold at display time
|
147 |
category_matches = rank_categories_openai(
|
148 |
product=product,
|
149 |
categories=category_descriptions,
|
|
|
151 |
client=openai_client,
|
152 |
model="gpt-4o-mini",
|
153 |
max_results=top_n,
|
154 |
+
confidence_threshold=0.0, # Don't filter here
|
155 |
debug=True
|
156 |
)
|
157 |
|
|
|
180 |
# Format results
|
181 |
progress_tracker(0.9, desc="Formatting results...")
|
182 |
|
183 |
+
# Create a list of result dictionaries in consistent format
|
184 |
+
formatted_results = []
|
185 |
+
|
186 |
for product, matches in final_results.items():
|
187 |
+
# Include all products, even with no matches
|
188 |
+
formatted_result = {
|
189 |
+
"product_name": product,
|
190 |
+
"confidence": max([item[-1] for item in matches]) if matches else 0,
|
191 |
+
"matching_items": [],
|
192 |
+
"item_scores": [], # Add item_scores to align with Voyage implementation
|
193 |
+
"explanation": expanded_descriptions.get(product, "") if use_expansion else ""
|
194 |
+
}
|
195 |
+
|
196 |
+
# Format matching items based on match type
|
197 |
+
if match_type == "ingredients":
|
198 |
+
formatted_result["matching_items"] = [item for item, score in matches]
|
199 |
+
formatted_result["item_scores"] = [score for item, score in matches]
|
200 |
+
else: # categories
|
201 |
+
for cat_id, cat_desc, score in matches:
|
202 |
+
formatted_result["matching_items"].append(
|
203 |
+
f"{cat_id}: {cat_desc}" if cat_desc else f"{cat_id}"
|
204 |
+
)
|
205 |
+
formatted_result["item_scores"].append(score)
|
206 |
+
|
207 |
+
formatted_results.append(formatted_result)
|
208 |
|
209 |
+
if not formatted_results:
|
210 |
+
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>"
|
|
|
|
|
211 |
|
212 |
+
result_html = format_reranking_results_html(
|
213 |
+
results=formatted_results,
|
214 |
+
match_type=match_type,
|
215 |
+
show_scores=True,
|
216 |
+
include_explanation=use_expansion,
|
217 |
+
method="openai",
|
218 |
+
confidence_threshold=confidence_threshold # Pass the threshold to the formatter
|
219 |
+
)
|
220 |
|
221 |
progress_tracker(1.0, desc="Done!")
|
222 |
+
return result_html
|
ui_formatters.py
CHANGED
@@ -190,34 +190,92 @@ def format_comparison_html(product, method_results):
|
|
190 |
# Create the full card with the methods content
|
191 |
return format_result_card(title=product, content=methods_html)
|
192 |
|
193 |
-
def
|
194 |
-
|
195 |
-
|
|
|
|
|
196 |
|
197 |
-
|
198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
-
#
|
201 |
-
|
202 |
-
|
203 |
|
204 |
-
|
205 |
-
|
206 |
-
def format_hybrid_results_html(product, results, summary, expanded_description=""):
|
207 |
-
"""Format the hybrid matching results as HTML."""
|
208 |
-
content = ""
|
209 |
-
|
210 |
-
# Add expanded description if provided
|
211 |
-
if expanded_description:
|
212 |
-
content += format_info_panel("Expanded Description", expanded_description)
|
213 |
|
214 |
-
#
|
215 |
-
|
216 |
|
217 |
-
|
218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
-
|
|
|
221 |
|
222 |
def create_results_container(html_elements, header_text=None):
|
223 |
"""
|
@@ -240,46 +298,136 @@ def create_results_container(html_elements, header_text=None):
|
|
240 |
|
241 |
return container
|
242 |
|
243 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
"""
|
245 |
-
Format
|
246 |
|
247 |
Args:
|
248 |
product: Product name
|
249 |
-
|
250 |
-
|
251 |
-
|
|
|
252 |
|
253 |
Returns:
|
254 |
-
HTML string
|
255 |
"""
|
256 |
content = ""
|
257 |
|
258 |
-
# Add
|
259 |
-
if
|
260 |
content += f"<div style='{STYLES['info_panel']}'>"
|
261 |
-
content += "<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
|
|
267 |
|
268 |
-
content +=
|
269 |
-
content += f"<
|
270 |
-
content += f"<
|
271 |
-
content += "
|
272 |
-
|
273 |
-
content += f"
|
|
|
274 |
|
|
|
275 |
content += "</div>"
|
276 |
-
|
277 |
-
|
278 |
-
content += format_method_results(
|
279 |
-
method_key="categories",
|
280 |
-
results=categories,
|
281 |
-
color_hex=header_color or METHOD_COLORS.get("categories", "#1abc9c")
|
282 |
-
)
|
283 |
|
284 |
return format_result_card(title=product, content=content)
|
285 |
|
@@ -288,7 +436,7 @@ def get_formatted_css():
|
|
288 |
Generate CSS for the UI based on current theme
|
289 |
|
290 |
Returns:
|
291 |
-
CSS string
|
292 |
"""
|
293 |
return f"""
|
294 |
.gradio-container .prose {{
|
@@ -336,102 +484,73 @@ def get_formatted_css():
|
|
336 |
|
337 |
def set_theme(theme_name):
|
338 |
"""
|
339 |
-
|
340 |
|
341 |
Args:
|
342 |
-
theme_name:
|
343 |
|
344 |
Returns:
|
345 |
-
|
346 |
"""
|
347 |
global THEME, COLORS, STYLES
|
348 |
-
|
349 |
if theme_name in THEMES:
|
350 |
THEME = theme_name
|
351 |
COLORS = THEMES[THEME]
|
352 |
-
|
353 |
-
|
354 |
-
STYLES.update({
|
355 |
"card": f"margin-bottom: 20px; border: 1px solid {COLORS['card_border']}; border-radius: 8px; overflow: hidden; background-color: {COLORS['card_bg']};",
|
356 |
"header": f"background-color: {COLORS['header_bg']}; padding: 12px 15px; border-bottom: 1px solid {COLORS['card_border']};",
|
357 |
"header_text": f"margin: 0; font-size: 18px; color: {COLORS['header_text']};",
|
|
|
358 |
"method_container": f"flex: 1; min-width: 200px; padding: 15px; border-right: 1px solid {COLORS['card_border']};",
|
359 |
"method_title": f"margin-top: 0; color: {COLORS['text_primary']}; padding-bottom: 8px;",
|
|
|
|
|
|
|
360 |
"info_panel": f"padding: 10px; background-color: {COLORS['section_bg']}; margin-bottom: 10px; border-radius: 4px;"
|
361 |
-
}
|
|
|
|
|
362 |
|
363 |
-
def
|
364 |
-
"""Format a single result item with confidence badge.
|
365 |
-
|
366 |
-
Args:
|
367 |
-
result: Tuple containing (name, score) or (id, name, score)
|
368 |
-
|
369 |
-
Returns:
|
370 |
-
HTML string for the result item or None if invalid format
|
371 |
"""
|
372 |
-
|
373 |
-
if len(result) == 3:
|
374 |
-
category_id, category_name, score = result
|
375 |
-
display_text = f"<strong>{category_id}</strong>: {category_name}"
|
376 |
-
elif len(result) == 2:
|
377 |
-
display_text, score = result
|
378 |
-
else:
|
379 |
-
return None # Skip any invalid formats
|
380 |
-
|
381 |
-
confidence_percent = int(score * 100)
|
382 |
-
confidence_color = get_confidence_color(score)
|
383 |
-
bg_color = get_confidence_bg_color(score)
|
384 |
-
|
385 |
-
item_html = f"<li style='display: flex; justify-content: space-between; align-items: center; margin-bottom: 4px;'>"
|
386 |
-
item_html += f"<span style='font-weight: 500; flex: 1;'>{display_text}</span>"
|
387 |
-
item_html += f"<span style='background-color: {bg_color}; border: 1px solid {confidence_color}; color: #000; font-weight: 600; padding: 2px 6px; border-radius: 4px; min-width: 70px; text-align: center; margin-left: 8px;'>Confidence: {confidence_percent}%</span>"
|
388 |
-
item_html += "</li>"
|
389 |
-
|
390 |
-
return item_html
|
391 |
-
|
392 |
-
def format_info_panel(title, content):
|
393 |
-
"""Format an information panel with title and content.
|
394 |
|
395 |
Args:
|
396 |
-
|
397 |
-
|
|
|
|
|
398 |
|
399 |
Returns:
|
400 |
-
HTML string
|
401 |
"""
|
402 |
-
|
403 |
-
panel_html += f"<h4 style='margin-top: 0; border-bottom: 1px solid rgba(0,0,0,0.1); padding-bottom: 8px;'>{title}</h4>"
|
404 |
-
panel_html += f"<p style='margin-bottom: 8px;'>{content}</p>"
|
405 |
-
panel_html += "</div>"
|
406 |
-
|
407 |
-
return panel_html
|
408 |
-
|
409 |
-
def format_results_section(results, section_title, match_type="ingredients"):
|
410 |
-
"""Format a results section with title and result items.
|
411 |
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
section_html += "</ul>"
|
432 |
-
else:
|
433 |
-
section_html += "<p style='color: #777; font-style: italic; margin: 5px 0;'>No matches found above confidence threshold.</p>"
|
434 |
-
|
435 |
-
section_html += "</div>"
|
436 |
|
437 |
-
return
|
|
|
190 |
# Create the full card with the methods content
|
191 |
return format_result_card(title=product, content=methods_html)
|
192 |
|
193 |
+
def format_reranking_results_html(results, match_type="ingredients", show_scores=True, include_explanation=False,
|
194 |
+
method="voyage", confidence_threshold=0.0):
|
195 |
+
"""
|
196 |
+
Unified formatter that works for both Voyage and OpenAI results, using the individual elements approach
|
197 |
+
with the original visual style.
|
198 |
|
199 |
+
Args:
|
200 |
+
results: List of result dictionaries
|
201 |
+
match_type: Either "ingredients" or "categories"
|
202 |
+
show_scores: Whether to show confidence scores
|
203 |
+
include_explanation: Whether to include expanded descriptions
|
204 |
+
method: Method used for ranking ("voyage" or "openai")
|
205 |
+
confidence_threshold: Threshold for filtering individual items (default 0.0 shows all)
|
206 |
+
|
207 |
+
Returns:
|
208 |
+
HTML string for displaying results
|
209 |
+
"""
|
210 |
+
if not results or len(results) == 0:
|
211 |
+
return f"No {match_type.lower()} matches found."
|
212 |
|
213 |
+
# Method-specific styling
|
214 |
+
method_color = METHOD_COLORS.get(method, "#777777")
|
215 |
+
method_name = METHOD_NAMES.get(method, method.capitalize())
|
216 |
|
217 |
+
# Create a header text
|
218 |
+
header_text = f"Matched {len(results)} products to {match_type} using {method_name}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
+
# Generate individual HTML elements for each result - using the old style approach
|
221 |
+
html_elements = []
|
222 |
|
223 |
+
for result in results:
|
224 |
+
product_name = result.get("product_name", "")
|
225 |
+
matching_items = result.get("matching_items", [])
|
226 |
+
item_scores = result.get("item_scores", [])
|
227 |
+
explanation = result.get("explanation", "") if include_explanation else ""
|
228 |
+
|
229 |
+
# Convert matching items into tuples with scores for format_expanded_results_html
|
230 |
+
formatted_matches = []
|
231 |
+
|
232 |
+
# Make sure we have scores for all items
|
233 |
+
if len(item_scores) != len(matching_items):
|
234 |
+
# If scores are missing, use overall confidence for all
|
235 |
+
result_confidence = result.get("confidence", 0.5)
|
236 |
+
item_scores = [result_confidence] * len(matching_items)
|
237 |
+
|
238 |
+
for i, item in enumerate(matching_items):
|
239 |
+
score = item_scores[i]
|
240 |
+
if ":" in item and match_type == "categories":
|
241 |
+
# Handle category format "id: description"
|
242 |
+
parts = item.split(":", 1)
|
243 |
+
cat_id = parts[0].strip()
|
244 |
+
cat_text = parts[1].strip() if len(parts) > 1 else ""
|
245 |
+
formatted_matches.append((cat_id, cat_text, score))
|
246 |
+
else:
|
247 |
+
# Handle ingredient format (just name and score)
|
248 |
+
formatted_matches.append((item, score))
|
249 |
+
|
250 |
+
# Only skip if there are no matches at all
|
251 |
+
if not formatted_matches:
|
252 |
+
continue
|
253 |
+
|
254 |
+
# Use the older style formatter with threshold
|
255 |
+
if include_explanation:
|
256 |
+
# Use expanded_results_html for the old style with expanded descriptions
|
257 |
+
element_html = format_expanded_results_html(
|
258 |
+
product=product_name,
|
259 |
+
results=formatted_matches,
|
260 |
+
expanded_description=explanation,
|
261 |
+
match_type=match_type,
|
262 |
+
confidence_threshold=confidence_threshold
|
263 |
+
)
|
264 |
+
else:
|
265 |
+
# Use hybrid_results_html when no expanded description is available
|
266 |
+
summary_text = f"{match_type.capitalize()} matches using {method_name}."
|
267 |
+
element_html = format_hybrid_results_html(
|
268 |
+
product=product_name,
|
269 |
+
results=formatted_matches,
|
270 |
+
summary=summary_text,
|
271 |
+
expanded_description="",
|
272 |
+
confidence_threshold=confidence_threshold
|
273 |
+
)
|
274 |
+
|
275 |
+
html_elements.append(element_html)
|
276 |
|
277 |
+
# Combine all elements into a container
|
278 |
+
return create_results_container(html_elements, header_text=header_text)
|
279 |
|
280 |
def create_results_container(html_elements, header_text=None):
|
281 |
"""
|
|
|
298 |
|
299 |
return container
|
300 |
|
301 |
+
def filter_results_by_threshold(results, confidence_threshold=0.0):
|
302 |
+
"""Helper function to filter results by confidence threshold"""
|
303 |
+
filtered_results = []
|
304 |
+
for item in results:
|
305 |
+
# Handle both 2-value (match, score) and 3-value (id, text, score) tuples
|
306 |
+
score = item[-1] if isinstance(item, tuple) and len(item) >= 2 else 0.0
|
307 |
+
# Only include results above the threshold
|
308 |
+
if score >= confidence_threshold:
|
309 |
+
filtered_results.append(item)
|
310 |
+
return filtered_results
|
311 |
+
|
312 |
+
def parse_result_item(item):
|
313 |
+
"""Helper function to parse result items into display text and score"""
|
314 |
+
# Handle both 2-value (match, score) and 3-value (id, text, score) tuples
|
315 |
+
if isinstance(item, tuple):
|
316 |
+
if len(item) == 2:
|
317 |
+
match, score = item
|
318 |
+
display_text = match
|
319 |
+
elif len(item) == 3:
|
320 |
+
cat_id, cat_text, score = item
|
321 |
+
display_text = f"{cat_id}: {cat_text}" if cat_text else cat_id
|
322 |
+
else:
|
323 |
+
display_text = str(item)
|
324 |
+
score = 0.0
|
325 |
+
else:
|
326 |
+
display_text = str(item)
|
327 |
+
score = 0.0
|
328 |
+
return display_text, score
|
329 |
+
|
330 |
+
def format_expanded_results_html(product, results, expanded_description, match_type="ingredients", confidence_threshold=0.0):
|
331 |
+
"""Format results using expanded descriptions"""
|
332 |
+
content = ""
|
333 |
+
|
334 |
+
# Add expanded description section
|
335 |
+
content += f"<div style='{STYLES['info_panel']}'>"
|
336 |
+
content += "<h4 style='margin-top: 0; border-bottom: 1px solid rgba(0,0,0,0.1); padding-bottom: 8px;'>Expanded Description</h4>"
|
337 |
+
content += f"<p style='margin-bottom: 8px;'>{expanded_description}</p>"
|
338 |
+
content += "</div>"
|
339 |
+
|
340 |
+
# Format the results section - create custom section
|
341 |
+
color_hex = METHOD_COLORS.get(match_type, "#1abc9c")
|
342 |
+
|
343 |
+
# Add results section with custom title
|
344 |
+
content += f"<div class='method-results' style='margin-top: 15px; border-left: 3px solid {color_hex}; padding-left: 15px;'>"
|
345 |
+
title_text = "Ingredients" if match_type == "ingredients" else "Categories"
|
346 |
+
content += f"<h4 style='margin-top: 0; color: {color_hex};'>{title_text}</h4>"
|
347 |
+
|
348 |
+
# Filter results by confidence threshold
|
349 |
+
filtered_results = filter_results_by_threshold(results, confidence_threshold)
|
350 |
+
|
351 |
+
if filtered_results:
|
352 |
+
content += "<ul style='margin-top: 5px; padding-left: 20px;'>"
|
353 |
+
for item in filtered_results:
|
354 |
+
display_text, score = parse_result_item(item)
|
355 |
+
confidence_percent = int(score * 100)
|
356 |
+
# Improved styling for confidence percentage - using black text for better contrast
|
357 |
+
confidence_color = get_confidence_color(score)
|
358 |
+
bg_color = get_confidence_bg_color(score)
|
359 |
+
content += f"<li style='display: flex; justify-content: space-between; align-items: center; margin-bottom: 4px;'>"
|
360 |
+
content += f"<span style='font-weight: 500; flex: 1;'>{display_text}</span>"
|
361 |
+
content += f"<span style='background-color: {bg_color}; border: 1px solid {confidence_color}; color: #000; font-weight: 600; padding: 2px 6px; border-radius: 4px; min-width: 70px; text-align: center; margin-left: 8px;'>Confidence: {confidence_percent}%</span>"
|
362 |
+
content += "</li>"
|
363 |
+
content += "</ul>"
|
364 |
+
else:
|
365 |
+
content += "<p style='color: #777; font-style: italic; margin: 5px 0;'>No matches found above confidence threshold.</p>"
|
366 |
+
|
367 |
+
content += "</div>"
|
368 |
+
|
369 |
+
return format_result_card(title=product, content=content)
|
370 |
+
|
371 |
+
def format_hybrid_results_html(product, results, summary, expanded_description="", confidence_threshold=0.0):
|
372 |
"""
|
373 |
+
Format results for hybrid matching
|
374 |
|
375 |
Args:
|
376 |
product: Product name
|
377 |
+
results: List of result tuples (name, score) or (id, name, score)
|
378 |
+
summary: Summary text to display
|
379 |
+
expanded_description: Optional expanded description
|
380 |
+
confidence_threshold: Threshold for filtering individual items
|
381 |
|
382 |
Returns:
|
383 |
+
HTML string for displaying results
|
384 |
"""
|
385 |
content = ""
|
386 |
|
387 |
+
# Add summary text
|
388 |
+
if summary:
|
389 |
content += f"<div style='{STYLES['info_panel']}'>"
|
390 |
+
content += f"<p style='margin: 0;'>{summary}</p>"
|
391 |
+
content += "</div>"
|
392 |
+
|
393 |
+
# Add expanded description if provided
|
394 |
+
if expanded_description:
|
395 |
+
content += f"<div style='{STYLES['info_panel']}'>"
|
396 |
+
content += "<h4 style='margin-top: 0; margin-bottom: 8px; border-bottom: 1px solid rgba(0,0,0,0.1); padding-bottom: 5px;'>Expanded Description</h4>"
|
397 |
+
content += f"<p style='margin: 0;'>{expanded_description}</p>"
|
398 |
+
content += "</div>"
|
399 |
+
|
400 |
+
# Filter results by confidence threshold
|
401 |
+
filtered_results = filter_results_by_threshold(results, confidence_threshold)
|
402 |
+
|
403 |
+
# Format the results
|
404 |
+
if filtered_results:
|
405 |
+
content += "<div style='padding: 10px;'>"
|
406 |
+
content += "<table style='width: 100%; border-collapse: collapse;'>"
|
407 |
+
content += "<thead><tr>"
|
408 |
+
content += "<th style='text-align: left; padding: 8px; border-bottom: 2px solid #ddd;'>Match</th>"
|
409 |
+
content += "<th style='text-align: right; padding: 8px; border-bottom: 2px solid #ddd; width: 100px;'>Confidence</th>"
|
410 |
+
content += "</tr></thead>"
|
411 |
+
content += "<tbody>"
|
412 |
|
413 |
+
for item in filtered_results:
|
414 |
+
display_text, score = parse_result_item(item)
|
415 |
+
confidence_percent = int(score * 100)
|
416 |
+
confidence_color = get_confidence_color(score)
|
417 |
+
bg_color = get_confidence_bg_color(score)
|
418 |
|
419 |
+
content += "<tr>"
|
420 |
+
content += f"<td style='text-align: left; padding: 8px; border-bottom: 1px solid #ddd;'>{display_text}</td>"
|
421 |
+
content += f"<td style='text-align: center; padding: 8px; border-bottom: 1px solid #ddd;'>"
|
422 |
+
content += f"<span style='background-color: {bg_color}; border: 1px solid {confidence_color}; color: #000;"
|
423 |
+
content += f"font-weight: 600; padding: 2px 6px; border-radius: 4px; display: inline-block; width: 70px;'>"
|
424 |
+
content += f"{confidence_percent}%</span></td>"
|
425 |
+
content += "</tr>"
|
426 |
|
427 |
+
content += "</tbody></table>"
|
428 |
content += "</div>"
|
429 |
+
else:
|
430 |
+
content += "<p style='color: #777; font-style: italic; padding: 10px; margin: 0;'>No matches found above confidence threshold.</p>"
|
|
|
|
|
|
|
|
|
|
|
431 |
|
432 |
return format_result_card(title=product, content=content)
|
433 |
|
|
|
436 |
Generate CSS for the UI based on current theme
|
437 |
|
438 |
Returns:
|
439 |
+
CSS string for styling the UI
|
440 |
"""
|
441 |
return f"""
|
442 |
.gradio-container .prose {{
|
|
|
484 |
|
485 |
def set_theme(theme_name):
|
486 |
"""
|
487 |
+
Update the global theme setting
|
488 |
|
489 |
Args:
|
490 |
+
theme_name: Theme name to set ("light" or "dark")
|
491 |
|
492 |
Returns:
|
493 |
+
Boolean indicating success
|
494 |
"""
|
495 |
global THEME, COLORS, STYLES
|
|
|
496 |
if theme_name in THEMES:
|
497 |
THEME = theme_name
|
498 |
COLORS = THEMES[THEME]
|
499 |
+
# Update styles with new colors
|
500 |
+
STYLES = {
|
|
|
501 |
"card": f"margin-bottom: 20px; border: 1px solid {COLORS['card_border']}; border-radius: 8px; overflow: hidden; background-color: {COLORS['card_bg']};",
|
502 |
"header": f"background-color: {COLORS['header_bg']}; padding: 12px 15px; border-bottom: 1px solid {COLORS['card_border']};",
|
503 |
"header_text": f"margin: 0; font-size: 18px; color: {COLORS['header_text']};",
|
504 |
+
"flex_container": "display: flex; flex-wrap: wrap;",
|
505 |
"method_container": f"flex: 1; min-width: 200px; padding: 15px; border-right: 1px solid {COLORS['card_border']};",
|
506 |
"method_title": f"margin-top: 0; color: {COLORS['text_primary']}; padding-bottom: 8px;",
|
507 |
+
"item_list": "list-style-type: none; padding-left: 0;",
|
508 |
+
"item": "margin-bottom: 8px; padding: 8px; border-radius: 4px;",
|
509 |
+
"empty_message": "color: #7f8c8d; font-style: italic;",
|
510 |
"info_panel": f"padding: 10px; background-color: {COLORS['section_bg']}; margin-bottom: 10px; border-radius: 4px;"
|
511 |
+
}
|
512 |
+
return True
|
513 |
+
return False
|
514 |
|
515 |
+
def format_categories_html(product, categories, chicory_result=None, header_color=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
516 |
"""
|
517 |
+
Format category matching results as HTML
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
518 |
|
519 |
Args:
|
520 |
+
product: Product name
|
521 |
+
categories: List of (category, score) tuples
|
522 |
+
chicory_result: Optional chicory parser result for the product
|
523 |
+
header_color: Optional header background color
|
524 |
|
525 |
Returns:
|
526 |
+
HTML string
|
527 |
"""
|
528 |
+
content = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
529 |
|
530 |
+
# Add Chicory results if available
|
531 |
+
if chicory_result:
|
532 |
+
content += f"<div style='{STYLES['info_panel']}'>"
|
533 |
+
content += "<h4 style='margin-top: 0; border-bottom: 1px solid rgba(0,0,0,0.1); padding-bottom: 8px;'>Chicory Parser Results</h4>"
|
534 |
|
535 |
+
if isinstance(chicory_result, dict):
|
536 |
+
ingredient = chicory_result.get("ingredient", "Not found")
|
537 |
+
confidence = chicory_result.get("confidence", 0)
|
538 |
+
confidence_percent = int(confidence * 100)
|
539 |
+
|
540 |
+
content += f"<div style='display: flex; justify-content: space-between; align-items: center; padding: 8px; border-radius: 4px;'>"
|
541 |
+
content += f"<span style='font-weight: bold;'>{ingredient}</span>"
|
542 |
+
content += f"<span style='background-color: {get_confidence_bg_color(confidence)}; border: 1px solid {get_confidence_color(confidence)}; color: #000; font-weight: 600; padding: 2px 6px; border-radius: 4px; min-width: 70px; text-align: center;'>Confidence: {confidence_percent}%</span>"
|
543 |
+
content += "</div>"
|
544 |
+
else:
|
545 |
+
content += f"<p style='{STYLES['empty_message']}'>No Chicory results available</p>"
|
546 |
+
|
547 |
+
content += "</div>"
|
548 |
|
549 |
+
# Add the category results
|
550 |
+
content += format_method_results(
|
551 |
+
method_key="categories",
|
552 |
+
results=categories,
|
553 |
+
color_hex=header_color or METHOD_COLORS.get("categories", "#1abc9c")
|
554 |
+
)
|
|
|
|
|
|
|
|
|
|
|
555 |
|
556 |
+
return format_result_card(title=product, content=content)
|
ui_hybrid_matching.py
CHANGED
@@ -3,7 +3,7 @@ from utils import SafeProgress
|
|
3 |
from category_matching import load_categories, hybrid_category_matching
|
4 |
from similarity import hybrid_ingredient_matching, compute_similarities
|
5 |
from ui_core import embeddings, parse_input
|
6 |
-
from ui_formatters import format_hybrid_results_html, create_results_container
|
7 |
from openai_expansion import expand_product_descriptions
|
8 |
from api_utils import get_voyage_client
|
9 |
|
@@ -12,16 +12,6 @@ def categorize_products_with_voyage_reranking(product_input, is_file=False, use_
|
|
12 |
match_type="categories", progress=gr.Progress()):
|
13 |
"""
|
14 |
Categorize products using Voyage reranking with optional description expansion
|
15 |
-
|
16 |
-
Args:
|
17 |
-
product_input: Text input with product names
|
18 |
-
is_file: Whether the input is a file
|
19 |
-
use_expansion: Whether to use AI description expansion (boolean switch)
|
20 |
-
embedding_top_n: Number of embedding candidates to consider
|
21 |
-
final_top_n: Final number of results to return
|
22 |
-
confidence_threshold: Minimum confidence threshold
|
23 |
-
match_type: Either "ingredients" or "categories"
|
24 |
-
progress: Progress tracking object
|
25 |
"""
|
26 |
progress_tracker = SafeProgress(progress)
|
27 |
progress_tracker(0, desc=f"Starting Voyage reranking for {match_type}...")
|
@@ -49,7 +39,7 @@ def categorize_products_with_voyage_reranking(product_input, is_file=False, use_
|
|
49 |
product_names, categories,
|
50 |
embedding_top_n=int(embedding_top_n),
|
51 |
final_top_n=int(final_top_n),
|
52 |
-
confidence_threshold=
|
53 |
expanded_descriptions=expanded_descriptions if use_expansion else None,
|
54 |
progress=progress
|
55 |
)
|
@@ -64,7 +54,7 @@ def categorize_products_with_voyage_reranking(product_input, is_file=False, use_
|
|
64 |
product_names, embeddings,
|
65 |
embedding_top_n=int(embedding_top_n),
|
66 |
final_top_n=int(final_top_n),
|
67 |
-
confidence_threshold=
|
68 |
expanded_descriptions=expanded_descriptions if use_expansion else None,
|
69 |
progress=progress
|
70 |
)
|
@@ -72,31 +62,55 @@ def categorize_products_with_voyage_reranking(product_input, is_file=False, use_
|
|
72 |
# Format results
|
73 |
progress_tracker(0.9, desc="Formatting results...")
|
74 |
|
75 |
-
|
|
|
76 |
for product, matches in match_results.items():
|
77 |
-
# Include
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
91 |
)
|
92 |
|
93 |
-
if not match_results:
|
94 |
-
output_html = "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>"
|
95 |
-
|
96 |
progress_tracker(1.0, desc="Done!")
|
97 |
-
return
|
98 |
|
99 |
-
#
|
100 |
def hybrid_ingredient_matching_voyage(products, ingredients_dict,
|
101 |
embedding_top_n=20, final_top_n=5,
|
102 |
confidence_threshold=0.5,
|
@@ -152,13 +166,13 @@ def hybrid_ingredient_matching_voyage(products, ingredients_dict,
|
|
152 |
model="rerank-2"
|
153 |
)
|
154 |
|
155 |
-
# Process results
|
156 |
voyage_results = []
|
157 |
for result in reranked["results"]:
|
158 |
score = result["relevance_score"]
|
159 |
-
|
160 |
-
voyage_results.append((result["document"]["text"], score))
|
161 |
|
|
|
162 |
final_results[product] = voyage_results[:final_top_n]
|
163 |
|
164 |
except Exception as e:
|
@@ -167,4 +181,4 @@ def hybrid_ingredient_matching_voyage(products, ingredients_dict,
|
|
167 |
final_results[product] = candidates[:1]
|
168 |
|
169 |
progress_tracker(1.0, desc="Voyage ingredient matching complete")
|
170 |
-
return final_results
|
|
|
3 |
from category_matching import load_categories, hybrid_category_matching
|
4 |
from similarity import hybrid_ingredient_matching, compute_similarities
|
5 |
from ui_core import embeddings, parse_input
|
6 |
+
from ui_formatters import format_hybrid_results_html, create_results_container, format_reranking_results_html
|
7 |
from openai_expansion import expand_product_descriptions
|
8 |
from api_utils import get_voyage_client
|
9 |
|
|
|
12 |
match_type="categories", progress=gr.Progress()):
|
13 |
"""
|
14 |
Categorize products using Voyage reranking with optional description expansion
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
"""
|
16 |
progress_tracker = SafeProgress(progress)
|
17 |
progress_tracker(0, desc=f"Starting Voyage reranking for {match_type}...")
|
|
|
39 |
product_names, categories,
|
40 |
embedding_top_n=int(embedding_top_n),
|
41 |
final_top_n=int(final_top_n),
|
42 |
+
confidence_threshold=0.0, # Don't apply threshold here - do it in display
|
43 |
expanded_descriptions=expanded_descriptions if use_expansion else None,
|
44 |
progress=progress
|
45 |
)
|
|
|
54 |
product_names, embeddings,
|
55 |
embedding_top_n=int(embedding_top_n),
|
56 |
final_top_n=int(final_top_n),
|
57 |
+
confidence_threshold=0.0, # Don't apply threshold here - do it in display
|
58 |
expanded_descriptions=expanded_descriptions if use_expansion else None,
|
59 |
progress=progress
|
60 |
)
|
|
|
62 |
# Format results
|
63 |
progress_tracker(0.9, desc="Formatting results...")
|
64 |
|
65 |
+
# Convert to unified format for formatter
|
66 |
+
formatted_results = []
|
67 |
for product, matches in match_results.items():
|
68 |
+
# Include all products, even with no matches
|
69 |
+
formatted_result = {
|
70 |
+
"product_name": product,
|
71 |
+
"confidence": max([item[-1] for item in matches]) if matches else 0,
|
72 |
+
"matching_items": [],
|
73 |
+
"item_scores": [],
|
74 |
+
"explanation": expanded_descriptions.get(product, "") if use_expansion else ""
|
75 |
+
}
|
76 |
+
|
77 |
+
# Format matching items based on match type
|
78 |
+
if match_type == "ingredients":
|
79 |
+
# Extract ingredient names and scores
|
80 |
+
formatted_result["matching_items"] = [item[0] for item in matches]
|
81 |
+
formatted_result["item_scores"] = [item[1] for item in matches]
|
82 |
+
else: # categories
|
83 |
+
for match in matches:
|
84 |
+
if len(match) >= 2:
|
85 |
+
cat_id = match[0]
|
86 |
+
# Some category matches might include a text description
|
87 |
+
cat_text = match[1] if len(match) > 2 else ""
|
88 |
+
score = match[-1]
|
89 |
+
if isinstance(cat_text, (int, float)): # This is not text but a score
|
90 |
+
cat_text = ""
|
91 |
+
formatted_result["matching_items"].append(
|
92 |
+
f"{cat_id}: {cat_text}" if cat_text else f"{cat_id}"
|
93 |
+
)
|
94 |
+
formatted_result["item_scores"].append(score)
|
95 |
+
|
96 |
+
formatted_results.append(formatted_result)
|
97 |
+
|
98 |
+
if not formatted_results:
|
99 |
+
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>"
|
100 |
|
101 |
+
result_html = format_reranking_results_html(
|
102 |
+
results=formatted_results,
|
103 |
+
match_type=match_type,
|
104 |
+
show_scores=True,
|
105 |
+
include_explanation=use_expansion,
|
106 |
+
method="voyage",
|
107 |
+
confidence_threshold=confidence_threshold # Pass the threshold to the formatter
|
108 |
)
|
109 |
|
|
|
|
|
|
|
110 |
progress_tracker(1.0, desc="Done!")
|
111 |
+
return result_html
|
112 |
|
113 |
+
# Update the function in ui_hybrid_matching.py
|
114 |
def hybrid_ingredient_matching_voyage(products, ingredients_dict,
|
115 |
embedding_top_n=20, final_top_n=5,
|
116 |
confidence_threshold=0.5,
|
|
|
166 |
model="rerank-2"
|
167 |
)
|
168 |
|
169 |
+
# Process results - include all results but keep the threshold for later filtering
|
170 |
voyage_results = []
|
171 |
for result in reranked["results"]:
|
172 |
score = result["relevance_score"]
|
173 |
+
voyage_results.append((result["document"]["text"], score))
|
|
|
174 |
|
175 |
+
# Still limit to final_top_n but don't filter by threshold here
|
176 |
final_results[product] = voyage_results[:final_top_n]
|
177 |
|
178 |
except Exception as e:
|
|
|
181 |
final_results[product] = candidates[:1]
|
182 |
|
183 |
progress_tracker(1.0, desc="Voyage ingredient matching complete")
|
184 |
+
return final_results
|