Spaces:
Sleeping
Sleeping
fixed bug
Browse files- ui_expanded_matching.py +123 -163
- ui_ingredient_matching.py +12 -11
ui_expanded_matching.py
CHANGED
@@ -8,9 +8,10 @@ from ui_formatters import format_reranking_results_html
|
|
8 |
from api_utils import get_openai_client, process_in_parallel, rank_ingredients_openai, rank_categories_openai
|
9 |
from category_matching import load_categories, load_category_embeddings
|
10 |
import json
|
|
|
11 |
|
12 |
def categorize_products_with_openai_reranking(product_input, is_file=False, use_expansion=False,
|
13 |
-
embedding_top_n=20, top_n=10, confidence_threshold=0.5,
|
14 |
match_type="ingredients"): # Removed progress parameter
|
15 |
"""
|
16 |
Categorize products using OpenAI reranking with optional description expansion
|
@@ -22,133 +23,119 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, use_
|
|
22 |
product_names, error = parse_input(product_input, is_file)
|
23 |
if error:
|
24 |
return error
|
25 |
-
|
26 |
# Validate embeddings are loaded if doing ingredient matching
|
27 |
if match_type == "ingredients" and not embeddings:
|
28 |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No ingredient embeddings loaded. Please check that the embeddings file exists and is properly formatted.</div>"
|
|
|
29 |
# Optional description expansion
|
30 |
expanded_descriptions = {}
|
31 |
if use_expansion:
|
32 |
# progress_tracker(0.2, desc="Expanding product descriptions...") # Removed progress
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
# Get shared OpenAI client
|
36 |
openai_client = get_openai_client()
|
37 |
-
|
38 |
-
products_for_embedding = ''
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
#
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
all_similarities = compute_similarities(embeddings, product_embeddings)
|
61 |
-
|
62 |
-
print(f"product_names: {product_names}")
|
63 |
-
print(f"products_for_embedding: {products_for_embedding}")
|
64 |
-
# print(f"all_similarities: {all_similarities}")
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
if not all_similarities:
|
|
|
67 |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No similarities found. Please try different product names.</div>"
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
76 |
candidates = all_similarities[product][:embedding_top_n]
|
77 |
-
if not candidates:
|
78 |
-
return product, []
|
79 |
-
|
80 |
candidate_ingredients = [c[0] for c in candidates]
|
81 |
expanded_text = expanded_descriptions.get(product, product) if use_expansion else product
|
82 |
-
|
83 |
try:
|
84 |
-
# Use the shared utility function - now passing 0.0 as threshold to get all results
|
85 |
-
# We'll apply the threshold at display time
|
86 |
reranked_ingredients = rank_ingredients_openai(
|
87 |
-
product=product,
|
88 |
-
|
89 |
-
|
90 |
-
client=openai_client,
|
91 |
-
model="gpt-4o-mini",
|
92 |
-
max_results=top_n,
|
93 |
-
confidence_threshold=0.0, # Don't filter here, do it at display time
|
94 |
-
debug=True
|
95 |
)
|
96 |
-
|
97 |
return product, reranked_ingredients
|
98 |
-
|
99 |
except Exception as e:
|
100 |
-
print(f"Error reranking {product}: {e}")
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
# Process all products in parallel
|
105 |
final_results = process_in_parallel(
|
106 |
-
items=product_names,
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
else: # categories
|
113 |
-
# Load category embeddings instead of JSON categories
|
114 |
-
# progress_tracker(0.5, desc="Loading category embeddings...") # Removed progress
|
115 |
-
category_embeddings = load_category_embeddings()
|
116 |
-
|
117 |
-
if not category_embeddings:
|
118 |
-
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No category embeddings found. Please check that the embeddings file exists at data/category_embeddings.pickle.</div>"
|
119 |
-
|
120 |
-
# Generate product embeddings
|
121 |
-
# progress_tracker(0.6, desc="Generating product embeddings...") # Removed progress
|
122 |
-
if use_expansion and expanded_descriptions:
|
123 |
-
# Use expanded descriptions for embedding creation when available
|
124 |
-
products_for_embedding = [expanded_descriptions.get(name, name) for name in product_names]
|
125 |
-
# Map expanded descriptions back to original product names for consistent keys
|
126 |
-
product_embeddings = {}
|
127 |
-
temp_embeddings = create_product_embeddings(products_for_embedding, original_products=product_names) # Removed progress, pass original names
|
128 |
-
|
129 |
-
# Ensure we use original product names as keys
|
130 |
-
for i, product_name in enumerate(product_names):
|
131 |
-
if i < len(products_for_embedding) and products_for_embedding[i] in temp_embeddings:
|
132 |
-
product_embeddings[product_name] = temp_embeddings[products_for_embedding[i]]
|
133 |
-
else:
|
134 |
-
# Standard embedding creation with just product names
|
135 |
-
product_embeddings = create_product_embeddings(product_names) # Removed progress
|
136 |
-
|
137 |
-
# Compute embedding similarities for categories
|
138 |
-
# progress_tracker(0.7, desc="Computing category similarities...") # Removed progress
|
139 |
-
all_similarities = compute_similarities(category_embeddings, product_embeddings)
|
140 |
-
|
141 |
-
if not all_similarities:
|
142 |
-
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No category similarities found. Please try different product names.</div>"
|
143 |
-
|
144 |
-
# Collect all needed category IDs first - don't filter by threshold here
|
145 |
needed_category_ids = set()
|
146 |
for product, similarities in all_similarities.items():
|
147 |
for category_id, score in similarities[:embedding_top_n]:
|
148 |
needed_category_ids.add(category_id)
|
149 |
-
|
150 |
-
# Load only the needed categories from JSON
|
151 |
-
# progress_tracker(0.75, desc="Loading category descriptions...") # Removed progress
|
152 |
category_descriptions = {}
|
153 |
if needed_category_ids:
|
154 |
try:
|
@@ -158,101 +145,74 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, use_
|
|
158 |
if item["id"] in needed_category_ids:
|
159 |
category_descriptions[item["id"]] = item["text"]
|
160 |
except Exception as e:
|
161 |
-
print(f"Error loading category descriptions: {e}")
|
162 |
-
|
163 |
-
# Function to process each product
|
164 |
-
def process_category_matching(product):
|
165 |
-
if product not in all_similarities:
|
166 |
-
return product, []
|
167 |
-
|
168 |
-
candidates = all_similarities[product][:embedding_top_n]
|
169 |
|
170 |
-
|
171 |
-
|
172 |
-
if not
|
173 |
-
|
174 |
-
|
175 |
-
|
|
|
|
|
176 |
expanded_text = expanded_descriptions.get(product, product) if use_expansion else product
|
177 |
-
|
178 |
try:
|
179 |
-
# FIXED: Filter categories to only include those in the current product's candidates
|
180 |
-
product_category_ids = [cat_id for cat_id, _ in candidates]
|
181 |
-
filtered_categories = {cat_id: category_descriptions[cat_id]
|
182 |
-
for cat_id in product_category_ids
|
183 |
-
if cat_id in category_descriptions}
|
184 |
-
|
185 |
-
# Pass 0.0 as threshold to get all results - apply threshold at display time
|
186 |
category_matches = rank_categories_openai(
|
187 |
-
product=product,
|
188 |
-
|
189 |
-
|
190 |
-
client=openai_client,
|
191 |
-
model="gpt-4o-mini",
|
192 |
-
max_results=top_n,
|
193 |
-
confidence_threshold=0.0, # Don't filter here
|
194 |
-
debug=True
|
195 |
)
|
196 |
-
|
197 |
-
# Format results with category descriptions if needed
|
198 |
formatted_matches = []
|
199 |
for category_id, score in category_matches:
|
200 |
category_text = category_descriptions.get(category_id, "Unknown category")
|
201 |
formatted_matches.append((category_id, category_text, score))
|
202 |
-
|
203 |
return product, formatted_matches
|
204 |
except Exception as e:
|
205 |
-
print(f"Error
|
206 |
-
|
207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
# Process all products in parallel
|
209 |
final_results = process_in_parallel(
|
210 |
-
items=product_names,
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
# Format results
|
217 |
-
# progress_tracker(0.9, desc="Formatting results...") # Removed progress
|
218 |
-
|
219 |
-
# Create a list of result dictionaries in consistent format
|
220 |
formatted_results = []
|
221 |
-
|
222 |
for product, matches in final_results.items():
|
223 |
-
# Include all products, even with no matches
|
224 |
formatted_result = {
|
225 |
"product_name": product,
|
226 |
"confidence": max([item[-1] for item in matches]) if matches else 0,
|
227 |
"matching_items": [],
|
228 |
-
"item_scores": [],
|
229 |
"explanation": expanded_descriptions.get(product, "") if use_expansion else ""
|
230 |
}
|
231 |
-
|
232 |
-
# Format matching items based on match type
|
233 |
if match_type == "ingredients":
|
234 |
formatted_result["matching_items"] = [item for item, score in matches]
|
235 |
formatted_result["item_scores"] = [score for item, score in matches]
|
236 |
-
else:
|
237 |
for cat_id, cat_desc, score in matches:
|
238 |
-
formatted_result["matching_items"].append(
|
239 |
-
f"{cat_id}: {cat_desc}" if cat_desc else f"{cat_id}"
|
240 |
-
)
|
241 |
formatted_result["item_scores"].append(score)
|
242 |
-
|
243 |
formatted_results.append(formatted_result)
|
244 |
-
|
245 |
if not formatted_results:
|
246 |
-
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found
|
247 |
-
|
248 |
result_html = format_reranking_results_html(
|
249 |
results=formatted_results,
|
250 |
match_type=match_type,
|
251 |
show_scores=True,
|
252 |
include_explanation=use_expansion,
|
253 |
method="openai",
|
254 |
-
confidence_threshold=confidence_threshold
|
255 |
)
|
256 |
-
|
257 |
-
# progress_tracker(1.0, desc="Done!") # Removed progress
|
258 |
return result_html
|
|
|
8 |
from api_utils import get_openai_client, process_in_parallel, rank_ingredients_openai, rank_categories_openai
|
9 |
from category_matching import load_categories, load_category_embeddings
|
10 |
import json
|
11 |
+
import traceback # Import traceback for detailed error logging
|
12 |
|
13 |
def categorize_products_with_openai_reranking(product_input, is_file=False, use_expansion=False,
|
14 |
+
embedding_top_n=20, top_n=10, confidence_threshold=0.5,
|
15 |
match_type="ingredients"): # Removed progress parameter
|
16 |
"""
|
17 |
Categorize products using OpenAI reranking with optional description expansion
|
|
|
23 |
product_names, error = parse_input(product_input, is_file)
|
24 |
if error:
|
25 |
return error
|
26 |
+
|
27 |
# Validate embeddings are loaded if doing ingredient matching
|
28 |
if match_type == "ingredients" and not embeddings:
|
29 |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No ingredient embeddings loaded. Please check that the embeddings file exists and is properly formatted.</div>"
|
30 |
+
|
31 |
# Optional description expansion
|
32 |
expanded_descriptions = {}
|
33 |
if use_expansion:
|
34 |
# progress_tracker(0.2, desc="Expanding product descriptions...") # Removed progress
|
35 |
+
try:
|
36 |
+
expanded_descriptions = expand_product_descriptions(product_names) # Removed progress argument
|
37 |
+
except Exception as e:
|
38 |
+
print(f"ERROR during description expansion: {e}")
|
39 |
+
print(traceback.format_exc())
|
40 |
+
return f"<div style='color: red;'>Error during description expansion: {e}</div>"
|
41 |
+
|
42 |
+
|
43 |
# Get shared OpenAI client
|
44 |
openai_client = get_openai_client()
|
|
|
|
|
45 |
|
46 |
+
product_embeddings = {} # Initialize here for broader scope
|
47 |
+
all_similarities = {} # Initialize here
|
48 |
+
|
49 |
+
try: # Wrap embedding generation and similarity computation
|
50 |
+
if match_type == "ingredients":
|
51 |
+
# --- Ingredient Matching Logic ---
|
52 |
+
# Generate product embeddings
|
53 |
+
if use_expansion and expanded_descriptions:
|
54 |
+
products_for_embedding = [expanded_descriptions.get(name, name) for name in product_names]
|
55 |
+
temp_embeddings = create_product_embeddings(products_for_embedding, original_products=product_names)
|
56 |
+
# Correctly map using original product names as keys
|
57 |
+
for product_name in product_names:
|
58 |
+
if product_name in temp_embeddings:
|
59 |
+
product_embeddings[product_name] = temp_embeddings[product_name]
|
60 |
+
else:
|
61 |
+
product_embeddings = create_product_embeddings(product_names)
|
62 |
+
|
63 |
+
# Check if embeddings were successfully generated/mapped
|
64 |
+
if not product_embeddings:
|
65 |
+
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: Failed to generate product embeddings for ingredients. Please try again.</div>"
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
+
# Compute embedding similarities for ingredients
|
68 |
+
all_similarities = compute_similarities(embeddings, product_embeddings)
|
69 |
+
|
70 |
+
else: # categories
|
71 |
+
# --- Category Matching Logic ---
|
72 |
+
category_embeddings = load_category_embeddings()
|
73 |
+
if not category_embeddings:
|
74 |
+
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No category embeddings found. Please check 'data/category_embeddings.pickle'.</div>"
|
75 |
+
|
76 |
+
# Generate product embeddings
|
77 |
+
if use_expansion and expanded_descriptions:
|
78 |
+
products_for_embedding = [expanded_descriptions.get(name, name) for name in product_names]
|
79 |
+
temp_embeddings = create_product_embeddings(products_for_embedding, original_products=product_names)
|
80 |
+
# Correctly map using original product names as keys
|
81 |
+
for product_name in product_names:
|
82 |
+
if product_name in temp_embeddings:
|
83 |
+
product_embeddings[product_name] = temp_embeddings[product_name]
|
84 |
+
else:
|
85 |
+
product_embeddings = create_product_embeddings(product_names)
|
86 |
+
|
87 |
+
# Check if embeddings were successfully generated/mapped
|
88 |
+
if not product_embeddings:
|
89 |
+
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: Failed to generate product embeddings for categories. Please try again.</div>"
|
90 |
+
|
91 |
+
# Compute embedding similarities for categories
|
92 |
+
all_similarities = compute_similarities(category_embeddings, product_embeddings)
|
93 |
+
|
94 |
+
# --- Common Logic Post Similarity ---
|
95 |
if not all_similarities:
|
96 |
+
# This check might be redundant if product_embeddings check catches the issue earlier, but keep for safety
|
97 |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No similarities found. Please try different product names.</div>"
|
98 |
+
|
99 |
+
except Exception as e: # Catch errors during embedding/similarity
|
100 |
+
print(f"ERROR during embedding generation or similarity computation: {e}")
|
101 |
+
print(traceback.format_exc())
|
102 |
+
return f"<div style='color: red;'>Error during processing: {e}<br><pre>{traceback.format_exc()}</pre></div>"
|
103 |
+
|
104 |
+
|
105 |
+
# --- Reranking Logic ---
|
106 |
+
final_results = {}
|
107 |
+
if match_type == "ingredients":
|
108 |
+
# Function for processing each product (Ingredients)
|
109 |
+
def process_reranking_ingredients(product):
|
110 |
+
if product not in all_similarities: return product, []
|
111 |
candidates = all_similarities[product][:embedding_top_n]
|
112 |
+
if not candidates: return product, []
|
|
|
|
|
113 |
candidate_ingredients = [c[0] for c in candidates]
|
114 |
expanded_text = expanded_descriptions.get(product, product) if use_expansion else product
|
|
|
115 |
try:
|
|
|
|
|
116 |
reranked_ingredients = rank_ingredients_openai(
|
117 |
+
product=product, candidates=candidate_ingredients, expanded_description=expanded_text,
|
118 |
+
client=openai_client, model="gpt-4o-mini", max_results=top_n,
|
119 |
+
confidence_threshold=0.0, debug=True
|
|
|
|
|
|
|
|
|
|
|
120 |
)
|
|
|
121 |
return product, reranked_ingredients
|
|
|
122 |
except Exception as e:
|
123 |
+
print(f"Error reranking ingredients for {product}: {e}")
|
124 |
+
return product, candidates[:1] # Fallback
|
125 |
+
|
|
|
126 |
# Process all products in parallel
|
127 |
final_results = process_in_parallel(
|
128 |
+
items=product_names, processor_func=process_reranking_ingredients,
|
129 |
+
max_workers=min(10, len(product_names))
|
130 |
+
)
|
131 |
+
|
132 |
+
else: # categories
|
133 |
+
# Load category descriptions needed for reranking
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
needed_category_ids = set()
|
135 |
for product, similarities in all_similarities.items():
|
136 |
for category_id, score in similarities[:embedding_top_n]:
|
137 |
needed_category_ids.add(category_id)
|
138 |
+
|
|
|
|
|
139 |
category_descriptions = {}
|
140 |
if needed_category_ids:
|
141 |
try:
|
|
|
145 |
if item["id"] in needed_category_ids:
|
146 |
category_descriptions[item["id"]] = item["text"]
|
147 |
except Exception as e:
|
148 |
+
print(f"Error loading category descriptions: {e}") # Non-fatal, continue without descriptions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
+
# Function to process each product (Categories)
|
151 |
+
def process_reranking_categories(product):
|
152 |
+
if product not in all_similarities: return product, []
|
153 |
+
candidates = all_similarities[product][:embedding_top_n]
|
154 |
+
if not candidates: return product, []
|
155 |
+
product_category_ids = [cat_id for cat_id, _ in candidates]
|
156 |
+
filtered_categories = {cat_id: category_descriptions.get(cat_id, f"Category {cat_id}") # Use get with fallback
|
157 |
+
for cat_id in product_category_ids}
|
158 |
expanded_text = expanded_descriptions.get(product, product) if use_expansion else product
|
|
|
159 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
category_matches = rank_categories_openai(
|
161 |
+
product=product, categories=filtered_categories, expanded_description=expanded_text,
|
162 |
+
client=openai_client, model="gpt-4o-mini", max_results=top_n,
|
163 |
+
confidence_threshold=0.0, debug=True
|
|
|
|
|
|
|
|
|
|
|
164 |
)
|
165 |
+
# Format results with category descriptions
|
|
|
166 |
formatted_matches = []
|
167 |
for category_id, score in category_matches:
|
168 |
category_text = category_descriptions.get(category_id, "Unknown category")
|
169 |
formatted_matches.append((category_id, category_text, score))
|
|
|
170 |
return product, formatted_matches
|
171 |
except Exception as e:
|
172 |
+
print(f"Error reranking categories for {product}: {e}")
|
173 |
+
# Fallback: Format top embedding candidates (without reranking score)
|
174 |
+
fallback_matches = []
|
175 |
+
for cat_id, score in candidates[:1]: # Take top 1 embedding match as fallback
|
176 |
+
category_text = category_descriptions.get(cat_id, "Unknown category")
|
177 |
+
fallback_matches.append((cat_id, category_text, score)) # Use embedding score
|
178 |
+
return product, fallback_matches
|
179 |
+
|
180 |
+
|
181 |
# Process all products in parallel
|
182 |
final_results = process_in_parallel(
|
183 |
+
items=product_names, processor_func=process_reranking_categories,
|
184 |
+
max_workers=min(10, len(product_names))
|
185 |
+
)
|
186 |
+
|
187 |
+
# --- Format final results ---
|
|
|
|
|
|
|
|
|
|
|
188 |
formatted_results = []
|
|
|
189 |
for product, matches in final_results.items():
|
|
|
190 |
formatted_result = {
|
191 |
"product_name": product,
|
192 |
"confidence": max([item[-1] for item in matches]) if matches else 0,
|
193 |
"matching_items": [],
|
194 |
+
"item_scores": [],
|
195 |
"explanation": expanded_descriptions.get(product, "") if use_expansion else ""
|
196 |
}
|
|
|
|
|
197 |
if match_type == "ingredients":
|
198 |
formatted_result["matching_items"] = [item for item, score in matches]
|
199 |
formatted_result["item_scores"] = [score for item, score in matches]
|
200 |
+
else: # categories
|
201 |
for cat_id, cat_desc, score in matches:
|
202 |
+
formatted_result["matching_items"].append(f"{cat_id}: {cat_desc}")
|
|
|
|
|
203 |
formatted_result["item_scores"].append(score)
|
|
|
204 |
formatted_results.append(formatted_result)
|
205 |
+
|
206 |
if not formatted_results:
|
207 |
+
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found after processing.</div>"
|
208 |
+
|
209 |
result_html = format_reranking_results_html(
|
210 |
results=formatted_results,
|
211 |
match_type=match_type,
|
212 |
show_scores=True,
|
213 |
include_explanation=use_expansion,
|
214 |
method="openai",
|
215 |
+
confidence_threshold=confidence_threshold
|
216 |
)
|
217 |
+
|
|
|
218 |
return result_html
|
ui_ingredient_matching.py
CHANGED
@@ -35,15 +35,17 @@ def categorize_products(product_input, is_file=False, use_expansion=False, top_n
|
|
35 |
# Map expanded descriptions back to original product names for consistent keys
|
36 |
products_embeddings = {}
|
37 |
temp_embeddings = create_product_embeddings(products_for_embedding, original_products=product_names) # Removed progress, pass original names for keys
|
38 |
-
|
39 |
# Ensure we use original product names as keys
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
43 |
else:
|
44 |
# Standard embedding creation with just product names
|
45 |
products_embeddings = create_product_embeddings(product_names) # Removed progress
|
46 |
-
|
47 |
if not products_embeddings:
|
48 |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: Failed to generate product embeddings. Please try again with different product names.</div>"
|
49 |
|
@@ -58,19 +60,19 @@ def categorize_products(product_input, is_file=False, use_expansion=False, top_n
|
|
58 |
# Format results
|
59 |
progress_tracker(0.9, desc="Formatting results...")
|
60 |
output_html = f"<p style='color: #555;'>Processing {len(product_names)} products.</p>"
|
61 |
-
|
62 |
for product, similarities in all_similarities.items():
|
63 |
filtered_similarities = [(ingredient, score) for ingredient, score in similarities if score >= confidence_threshold]
|
64 |
top_similarities = filtered_similarities[:int(top_n)]
|
65 |
-
|
66 |
# Add expansion explanation if available
|
67 |
expansion_text = expanded_descriptions.get(product, "") if use_expansion else ""
|
68 |
-
|
69 |
# Debug info for Chicory results
|
70 |
chicory_data = chicory_results.get(product, [])
|
71 |
output_html += format_categories_html(
|
72 |
-
product,
|
73 |
-
top_similarities,
|
74 |
chicory_result=chicory_data,
|
75 |
explanation=expansion_text,
|
76 |
match_type="ingredients",
|
@@ -83,4 +85,3 @@ def categorize_products(product_input, is_file=False, use_expansion=False, top_n
|
|
83 |
|
84 |
progress_tracker(1.0, desc="Done!")
|
85 |
return output_html # Return the generated HTML directly
|
86 |
-
|
|
|
35 |
# Map expanded descriptions back to original product names for consistent keys
|
36 |
products_embeddings = {}
|
37 |
temp_embeddings = create_product_embeddings(products_for_embedding, original_products=product_names) # Removed progress, pass original names for keys
|
38 |
+
|
39 |
# Ensure we use original product names as keys
|
40 |
+
# Corrected loop: Iterate through original names and use them as keys
|
41 |
+
for product_name in product_names:
|
42 |
+
# Check if the original product name exists as a key in the returned embeddings
|
43 |
+
if product_name in temp_embeddings:
|
44 |
+
products_embeddings[product_name] = temp_embeddings[product_name]
|
45 |
else:
|
46 |
# Standard embedding creation with just product names
|
47 |
products_embeddings = create_product_embeddings(product_names) # Removed progress
|
48 |
+
|
49 |
if not products_embeddings:
|
50 |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: Failed to generate product embeddings. Please try again with different product names.</div>"
|
51 |
|
|
|
60 |
# Format results
|
61 |
progress_tracker(0.9, desc="Formatting results...")
|
62 |
output_html = f"<p style='color: #555;'>Processing {len(product_names)} products.</p>"
|
63 |
+
|
64 |
for product, similarities in all_similarities.items():
|
65 |
filtered_similarities = [(ingredient, score) for ingredient, score in similarities if score >= confidence_threshold]
|
66 |
top_similarities = filtered_similarities[:int(top_n)]
|
67 |
+
|
68 |
# Add expansion explanation if available
|
69 |
expansion_text = expanded_descriptions.get(product, "") if use_expansion else ""
|
70 |
+
|
71 |
# Debug info for Chicory results
|
72 |
chicory_data = chicory_results.get(product, [])
|
73 |
output_html += format_categories_html(
|
74 |
+
product,
|
75 |
+
top_similarities,
|
76 |
chicory_result=chicory_data,
|
77 |
explanation=expansion_text,
|
78 |
match_type="ingredients",
|
|
|
85 |
|
86 |
progress_tracker(1.0, desc="Done!")
|
87 |
return output_html # Return the generated HTML directly
|
|