Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
@@ -143,26 +143,42 @@ def load_embeddings(embeddings_path):
|
|
143 |
print(f"Loaded {len(ingredients_embeddings)} ingredient embeddings")
|
144 |
return ingredients_embeddings
|
145 |
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
"""Categorize products from text input (one product per line)"""
|
|
|
|
|
|
|
|
|
148 |
# Parse input text to get product names
|
149 |
product_names = [line.strip() for line in product_text.split("\n") if line.strip()]
|
150 |
|
151 |
if not product_names:
|
152 |
return "No product names provided."
|
153 |
|
154 |
-
progress(0, desc="Starting...")
|
155 |
-
|
156 |
# Create product embeddings
|
157 |
-
|
158 |
products_embeddings = create_product_embeddings_voyageai(product_names)
|
159 |
|
160 |
# Compute similarities
|
161 |
-
|
162 |
all_similarities = compute_similarities(embeddings, products_embeddings)
|
163 |
|
164 |
# Format results
|
165 |
-
|
166 |
results = {}
|
167 |
for product, similarities in all_similarities.items():
|
168 |
# Filter by confidence threshold and take top N
|
@@ -183,12 +199,14 @@ def categorize_products_from_text(product_text, embeddings, progress=gr.Progress
|
|
183 |
output_text += " No matching categories found.\n"
|
184 |
output_text += "\n"
|
185 |
|
186 |
-
|
187 |
return output_text
|
188 |
|
189 |
-
def categorize_products_from_file(file,
|
190 |
"""Categorize products from a JSON file"""
|
191 |
-
progress
|
|
|
|
|
192 |
|
193 |
try:
|
194 |
with open(file.name, 'r') as f:
|
@@ -215,15 +233,15 @@ def categorize_products_from_file(file, embeddings, progress=gr.Progress(), top_
|
|
215 |
return "No product names found in the file."
|
216 |
|
217 |
# Create product embeddings
|
218 |
-
|
219 |
products_embeddings = create_product_embeddings_voyageai(product_names)
|
220 |
|
221 |
# Compute similarities
|
222 |
-
|
223 |
all_similarities = compute_similarities(embeddings, products_embeddings)
|
224 |
|
225 |
# Format results
|
226 |
-
|
227 |
output_text = f"Found {len(product_names)} products in file.\n\n"
|
228 |
|
229 |
for product, similarities in all_similarities.items():
|
@@ -240,13 +258,28 @@ def categorize_products_from_file(file, embeddings, progress=gr.Progress(), top_
|
|
240 |
output_text += " No matching categories found.\n"
|
241 |
output_text += "\n"
|
242 |
|
243 |
-
|
244 |
return output_text
|
245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
# ===== Gradio Interface Setup =====
|
247 |
def create_interface(embeddings_path="ingredient_embeddings_voyageai.pkl"):
|
248 |
-
#
|
249 |
-
embeddings
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
|
251 |
# Text input interface
|
252 |
with gr.Blocks() as demo:
|
@@ -282,9 +315,7 @@ def create_interface(embeddings_path="ingredient_embeddings_voyageai.pkl"):
|
|
282 |
text_output = gr.Textbox(label="Categorization Results", lines=20)
|
283 |
|
284 |
submit_button.click(
|
285 |
-
fn=
|
286 |
-
text, embeddings, prog, top_n, conf
|
287 |
-
),
|
288 |
inputs=[text_input, top_n, confidence],
|
289 |
outputs=text_output
|
290 |
)
|
@@ -313,9 +344,7 @@ def create_interface(embeddings_path="ingredient_embeddings_voyageai.pkl"):
|
|
313 |
file_output = gr.Textbox(label="Categorization Results", lines=20)
|
314 |
|
315 |
file_button.click(
|
316 |
-
fn=
|
317 |
-
file, embeddings, prog, top_n, conf
|
318 |
-
),
|
319 |
inputs=[file_input, file_top_n, file_confidence],
|
320 |
outputs=file_output
|
321 |
)
|
|
|
143 |
print(f"Loaded {len(ingredients_embeddings)} ingredient embeddings")
|
144 |
return ingredients_embeddings
|
145 |
|
146 |
+
# Define a safe progress tracker that handles None
|
147 |
+
class SafeProgress:
|
148 |
+
def __init__(self, progress_obj=None):
|
149 |
+
self.progress = progress_obj
|
150 |
+
|
151 |
+
def __call__(self, value, desc=""):
|
152 |
+
if self.progress is not None:
|
153 |
+
try:
|
154 |
+
self.progress(value, desc=desc)
|
155 |
+
except:
|
156 |
+
print(f"Progress {value}: {desc}")
|
157 |
+
else:
|
158 |
+
print(f"Progress {value}: {desc}")
|
159 |
+
|
160 |
+
def categorize_products_from_text(product_text, top_n=5, confidence_threshold=0.5, progress=None):
|
161 |
"""Categorize products from text input (one product per line)"""
|
162 |
+
# Create a safe progress tracker
|
163 |
+
progress_tracker = SafeProgress(progress)
|
164 |
+
progress_tracker(0, desc="Starting...")
|
165 |
+
|
166 |
# Parse input text to get product names
|
167 |
product_names = [line.strip() for line in product_text.split("\n") if line.strip()]
|
168 |
|
169 |
if not product_names:
|
170 |
return "No product names provided."
|
171 |
|
|
|
|
|
172 |
# Create product embeddings
|
173 |
+
progress_tracker(0.1, desc="Generating product embeddings...")
|
174 |
products_embeddings = create_product_embeddings_voyageai(product_names)
|
175 |
|
176 |
# Compute similarities
|
177 |
+
progress_tracker(0.6, desc="Computing similarities...")
|
178 |
all_similarities = compute_similarities(embeddings, products_embeddings)
|
179 |
|
180 |
# Format results
|
181 |
+
progress_tracker(0.9, desc="Formatting results...")
|
182 |
results = {}
|
183 |
for product, similarities in all_similarities.items():
|
184 |
# Filter by confidence threshold and take top N
|
|
|
199 |
output_text += " No matching categories found.\n"
|
200 |
output_text += "\n"
|
201 |
|
202 |
+
progress_tracker(1.0, desc="Done!")
|
203 |
return output_text
|
204 |
|
205 |
+
def categorize_products_from_file(file, top_n=5, confidence_threshold=0.5, progress=None):
|
206 |
"""Categorize products from a JSON file"""
|
207 |
+
# Create a safe progress tracker
|
208 |
+
progress_tracker = SafeProgress(progress)
|
209 |
+
progress_tracker(0.1, desc="Reading file...")
|
210 |
|
211 |
try:
|
212 |
with open(file.name, 'r') as f:
|
|
|
233 |
return "No product names found in the file."
|
234 |
|
235 |
# Create product embeddings
|
236 |
+
progress_tracker(0.2, desc="Generating product embeddings...")
|
237 |
products_embeddings = create_product_embeddings_voyageai(product_names)
|
238 |
|
239 |
# Compute similarities
|
240 |
+
progress_tracker(0.7, desc="Computing similarities...")
|
241 |
all_similarities = compute_similarities(embeddings, products_embeddings)
|
242 |
|
243 |
# Format results
|
244 |
+
progress_tracker(0.9, desc="Formatting results...")
|
245 |
output_text = f"Found {len(product_names)} products in file.\n\n"
|
246 |
|
247 |
for product, similarities in all_similarities.items():
|
|
|
258 |
output_text += " No matching categories found.\n"
|
259 |
output_text += "\n"
|
260 |
|
261 |
+
progress_tracker(1.0, desc="Done!")
|
262 |
return output_text
|
263 |
|
264 |
+
# Load embeddings at the module level for easier access
|
265 |
+
try:
|
266 |
+
embeddings_path = "ingredient_embeddings_voyageai.pkl"
|
267 |
+
embeddings = load_embeddings(embeddings_path)
|
268 |
+
except Exception as e:
|
269 |
+
print(f"Warning: Could not load embeddings at startup: {e}")
|
270 |
+
print("Will attempt to load them when the app runs")
|
271 |
+
embeddings = {}
|
272 |
+
|
273 |
# ===== Gradio Interface Setup =====
|
274 |
def create_interface(embeddings_path="ingredient_embeddings_voyageai.pkl"):
|
275 |
+
# Ensure embeddings are loaded
|
276 |
+
global embeddings
|
277 |
+
if not embeddings:
|
278 |
+
try:
|
279 |
+
embeddings = load_embeddings(embeddings_path)
|
280 |
+
except Exception as e:
|
281 |
+
print(f"Error loading embeddings: {e}")
|
282 |
+
gr.Error(f"Failed to load embeddings file: {e}")
|
283 |
|
284 |
# Text input interface
|
285 |
with gr.Blocks() as demo:
|
|
|
315 |
text_output = gr.Textbox(label="Categorization Results", lines=20)
|
316 |
|
317 |
submit_button.click(
|
318 |
+
fn=categorize_products_from_text,
|
|
|
|
|
319 |
inputs=[text_input, top_n, confidence],
|
320 |
outputs=text_output
|
321 |
)
|
|
|
344 |
file_output = gr.Textbox(label="Categorization Results", lines=20)
|
345 |
|
346 |
file_button.click(
|
347 |
+
fn=categorize_products_from_file,
|
|
|
|
|
348 |
inputs=[file_input, file_top_n, file_confidence],
|
349 |
outputs=file_output
|
350 |
)
|