eliago commited on
Commit
a198898
·
verified ·
1 Parent(s): 3e3d964

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -21
app.py CHANGED
@@ -143,26 +143,42 @@ def load_embeddings(embeddings_path):
143
  print(f"Loaded {len(ingredients_embeddings)} ingredient embeddings")
144
  return ingredients_embeddings
145
 
146
- def categorize_products_from_text(product_text, embeddings, progress=gr.Progress(), top_n=5, confidence_threshold=0.5):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  """Categorize products from text input (one product per line)"""
 
 
 
 
148
  # Parse input text to get product names
149
  product_names = [line.strip() for line in product_text.split("\n") if line.strip()]
150
 
151
  if not product_names:
152
  return "No product names provided."
153
 
154
- progress(0, desc="Starting...")
155
-
156
  # Create product embeddings
157
- progress(0.1, desc="Generating product embeddings...")
158
  products_embeddings = create_product_embeddings_voyageai(product_names)
159
 
160
  # Compute similarities
161
- progress(0.6, desc="Computing similarities...")
162
  all_similarities = compute_similarities(embeddings, products_embeddings)
163
 
164
  # Format results
165
- progress(0.9, desc="Formatting results...")
166
  results = {}
167
  for product, similarities in all_similarities.items():
168
  # Filter by confidence threshold and take top N
@@ -183,12 +199,14 @@ def categorize_products_from_text(product_text, embeddings, progress=gr.Progress
183
  output_text += " No matching categories found.\n"
184
  output_text += "\n"
185
 
186
- progress(1.0, desc="Done!")
187
  return output_text
188
 
189
- def categorize_products_from_file(file, embeddings, progress=gr.Progress(), top_n=5, confidence_threshold=0.5):
190
  """Categorize products from a JSON file"""
191
- progress(0.1, desc="Reading file...")
 
 
192
 
193
  try:
194
  with open(file.name, 'r') as f:
@@ -215,15 +233,15 @@ def categorize_products_from_file(file, embeddings, progress=gr.Progress(), top_
215
  return "No product names found in the file."
216
 
217
  # Create product embeddings
218
- progress(0.2, desc="Generating product embeddings...")
219
  products_embeddings = create_product_embeddings_voyageai(product_names)
220
 
221
  # Compute similarities
222
- progress(0.7, desc="Computing similarities...")
223
  all_similarities = compute_similarities(embeddings, products_embeddings)
224
 
225
  # Format results
226
- progress(0.9, desc="Formatting results...")
227
  output_text = f"Found {len(product_names)} products in file.\n\n"
228
 
229
  for product, similarities in all_similarities.items():
@@ -240,13 +258,28 @@ def categorize_products_from_file(file, embeddings, progress=gr.Progress(), top_
240
  output_text += " No matching categories found.\n"
241
  output_text += "\n"
242
 
243
- progress(1.0, desc="Done!")
244
  return output_text
245
 
 
 
 
 
 
 
 
 
 
246
  # ===== Gradio Interface Setup =====
247
  def create_interface(embeddings_path="ingredient_embeddings_voyageai.pkl"):
248
- # Load embeddings once at startup
249
- embeddings = load_embeddings(embeddings_path)
 
 
 
 
 
 
250
 
251
  # Text input interface
252
  with gr.Blocks() as demo:
@@ -282,9 +315,7 @@ def create_interface(embeddings_path="ingredient_embeddings_voyageai.pkl"):
282
  text_output = gr.Textbox(label="Categorization Results", lines=20)
283
 
284
  submit_button.click(
285
- fn=lambda text, top_n, conf, prog: categorize_products_from_text(
286
- text, embeddings, prog, top_n, conf
287
- ),
288
  inputs=[text_input, top_n, confidence],
289
  outputs=text_output
290
  )
@@ -313,9 +344,7 @@ def create_interface(embeddings_path="ingredient_embeddings_voyageai.pkl"):
313
  file_output = gr.Textbox(label="Categorization Results", lines=20)
314
 
315
  file_button.click(
316
- fn=lambda file, top_n, conf, prog: categorize_products_from_file(
317
- file, embeddings, prog, top_n, conf
318
- ),
319
  inputs=[file_input, file_top_n, file_confidence],
320
  outputs=file_output
321
  )
 
143
  print(f"Loaded {len(ingredients_embeddings)} ingredient embeddings")
144
  return ingredients_embeddings
145
 
146
+ # Define a safe progress tracker that handles None
147
+ class SafeProgress:
148
+ def __init__(self, progress_obj=None):
149
+ self.progress = progress_obj
150
+
151
+ def __call__(self, value, desc=""):
152
+ if self.progress is not None:
153
+ try:
154
+ self.progress(value, desc=desc)
155
+ except:
156
+ print(f"Progress {value}: {desc}")
157
+ else:
158
+ print(f"Progress {value}: {desc}")
159
+
160
+ def categorize_products_from_text(product_text, top_n=5, confidence_threshold=0.5, progress=None):
161
  """Categorize products from text input (one product per line)"""
162
+ # Create a safe progress tracker
163
+ progress_tracker = SafeProgress(progress)
164
+ progress_tracker(0, desc="Starting...")
165
+
166
  # Parse input text to get product names
167
  product_names = [line.strip() for line in product_text.split("\n") if line.strip()]
168
 
169
  if not product_names:
170
  return "No product names provided."
171
 
 
 
172
  # Create product embeddings
173
+ progress_tracker(0.1, desc="Generating product embeddings...")
174
  products_embeddings = create_product_embeddings_voyageai(product_names)
175
 
176
  # Compute similarities
177
+ progress_tracker(0.6, desc="Computing similarities...")
178
  all_similarities = compute_similarities(embeddings, products_embeddings)
179
 
180
  # Format results
181
+ progress_tracker(0.9, desc="Formatting results...")
182
  results = {}
183
  for product, similarities in all_similarities.items():
184
  # Filter by confidence threshold and take top N
 
199
  output_text += " No matching categories found.\n"
200
  output_text += "\n"
201
 
202
+ progress_tracker(1.0, desc="Done!")
203
  return output_text
204
 
205
+ def categorize_products_from_file(file, top_n=5, confidence_threshold=0.5, progress=None):
206
  """Categorize products from a JSON file"""
207
+ # Create a safe progress tracker
208
+ progress_tracker = SafeProgress(progress)
209
+ progress_tracker(0.1, desc="Reading file...")
210
 
211
  try:
212
  with open(file.name, 'r') as f:
 
233
  return "No product names found in the file."
234
 
235
  # Create product embeddings
236
+ progress_tracker(0.2, desc="Generating product embeddings...")
237
  products_embeddings = create_product_embeddings_voyageai(product_names)
238
 
239
  # Compute similarities
240
+ progress_tracker(0.7, desc="Computing similarities...")
241
  all_similarities = compute_similarities(embeddings, products_embeddings)
242
 
243
  # Format results
244
+ progress_tracker(0.9, desc="Formatting results...")
245
  output_text = f"Found {len(product_names)} products in file.\n\n"
246
 
247
  for product, similarities in all_similarities.items():
 
258
  output_text += " No matching categories found.\n"
259
  output_text += "\n"
260
 
261
+ progress_tracker(1.0, desc="Done!")
262
  return output_text
263
 
264
+ # Load embeddings at the module level for easier access
265
+ try:
266
+ embeddings_path = "ingredient_embeddings_voyageai.pkl"
267
+ embeddings = load_embeddings(embeddings_path)
268
+ except Exception as e:
269
+ print(f"Warning: Could not load embeddings at startup: {e}")
270
+ print("Will attempt to load them when the app runs")
271
+ embeddings = {}
272
+
273
  # ===== Gradio Interface Setup =====
274
  def create_interface(embeddings_path="ingredient_embeddings_voyageai.pkl"):
275
+ # Ensure embeddings are loaded
276
+ global embeddings
277
+ if not embeddings:
278
+ try:
279
+ embeddings = load_embeddings(embeddings_path)
280
+ except Exception as e:
281
+ print(f"Error loading embeddings: {e}")
282
+ gr.Error(f"Failed to load embeddings file: {e}")
283
 
284
  # Text input interface
285
  with gr.Blocks() as demo:
 
315
  text_output = gr.Textbox(label="Categorization Results", lines=20)
316
 
317
  submit_button.click(
318
+ fn=categorize_products_from_text,
 
 
319
  inputs=[text_input, top_n, confidence],
320
  outputs=text_output
321
  )
 
344
  file_output = gr.Textbox(label="Categorization Results", lines=20)
345
 
346
  file_button.click(
347
+ fn=categorize_products_from_file,
 
 
348
  inputs=[file_input, file_top_n, file_confidence],
349
  outputs=file_output
350
  )