eliago commited on
Commit
e7db3d5
·
verified ·
1 Parent(s): 91ba2a4

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +340 -0
  2. readme.md +43 -0
  3. requirements.txt +3 -0
  4. run_app.sh +14 -0
  5. spaces.py +198 -0
app.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pickle
3
+ import os
4
+ import json
5
+ import numpy as np
6
+ import voyageai
7
+ import time
8
+ import sys
9
+ from concurrent.futures import ThreadPoolExecutor
10
+
11
+ # Set Voyage AI API key directly (using the free version key from your code)
12
+ voyageai.api_key = "pa-DvIuCX_5TrCyxS6y74sUYpyWWGd4gN0Kf52y642y6k0"
13
+
14
+ # Force unbuffered output
15
+ os.environ['PYTHONUNBUFFERED'] = '1'
16
+
17
+ # ===== Embedding Generation Functions =====
18
+ def get_embeddings_batch(texts, model="voyage-3-large", batch_size=100):
19
+ """Get embeddings for a list of texts in batches"""
20
+ all_embeddings = []
21
+ total_texts = len(texts)
22
+
23
+ # Pre-process all texts to replace newlines
24
+ texts = [text.replace("\n", " ") for text in texts]
25
+
26
+ for i in range(0, len(texts), batch_size):
27
+ batch = texts[i:i+batch_size]
28
+
29
+ try:
30
+ response = voyageai.Embedding.create(input=batch, model=model)
31
+ batch_embeddings = [item['embedding'] for item in response['data']]
32
+ all_embeddings.extend(batch_embeddings)
33
+
34
+ # Sleep briefly to avoid rate limits
35
+ if i + batch_size < len(texts):
36
+ time.sleep(0.5)
37
+
38
+ except Exception as e:
39
+ print(f"Error in batch {i//batch_size + 1}: {e}")
40
+ # Add empty embeddings for failed batch
41
+ all_embeddings.extend([None] * len(batch))
42
+
43
+ return all_embeddings
44
+
45
+ def create_product_embeddings_voyageai(products, batch_size=100):
46
+ """Create embeddings for products using batch processing with deduplication"""
47
+ # De-duplication step
48
+ unique_products = []
49
+ product_to_index = {}
50
+ index_map = {} # Maps original index to index in unique_products
51
+
52
+ for i, product in enumerate(products):
53
+ if product in product_to_index:
54
+ # Product already seen, just store the mapping
55
+ index_map[i] = product_to_index[product]
56
+ else:
57
+ # New unique product
58
+ product_to_index[product] = len(unique_products)
59
+ index_map[i] = len(unique_products)
60
+ unique_products.append(product)
61
+
62
+ print(f"Found {len(unique_products)} unique products out of {len(products)} total")
63
+
64
+ if len(unique_products) == 0:
65
+ return {}
66
+
67
+ # Process only unique products
68
+ print(f"Processing {len(unique_products)} unique products")
69
+
70
+ # Get embeddings for unique products
71
+ unique_embeddings = get_embeddings_batch(unique_products, batch_size=batch_size)
72
+
73
+ # Map embeddings back to all products
74
+ all_products_dict = {}
75
+ for i, product in enumerate(products):
76
+ unique_idx = index_map[i]
77
+ if unique_idx < len(unique_embeddings) and unique_embeddings[unique_idx] is not None:
78
+ all_products_dict[product] = unique_embeddings[unique_idx]
79
+
80
+ print(f"Created embeddings for {len(all_products_dict)} products")
81
+
82
+ return all_products_dict
83
+
84
+ # ===== Similarity Computation Functions =====
85
+ def compute_similarities(ingredients_dict, products_dict):
86
+ """Compute similarities between all products and ingredients using NumPy"""
87
+ # Filter valid ingredients (with non-None embeddings)
88
+ ingredient_names = []
89
+ ingredient_embeddings_list = []
90
+ for ing, emb in ingredients_dict.items():
91
+ if emb is not None:
92
+ ingredient_names.append(ing)
93
+ ingredient_embeddings_list.append(emb)
94
+
95
+ # Convert ingredient embeddings to numpy array
96
+ ingredient_embeddings = np.array(ingredient_embeddings_list, dtype=np.float32)
97
+
98
+ # Normalize ingredient embeddings for cosine similarity
99
+ ingredient_norms = np.linalg.norm(ingredient_embeddings, axis=1, keepdims=True)
100
+ normalized_ingredients = ingredient_embeddings / ingredient_norms
101
+
102
+ # Process all products
103
+ all_similarities = {}
104
+ valid_products = []
105
+ valid_embeddings = []
106
+
107
+ for product, embedding in products_dict.items():
108
+ if embedding is not None:
109
+ valid_products.append(product)
110
+ valid_embeddings.append(embedding)
111
+
112
+ if not valid_products:
113
+ return {}
114
+
115
+ # Convert product embeddings to numpy array
116
+ product_embeddings = np.array(valid_embeddings, dtype=np.float32)
117
+
118
+ # Normalize product embeddings
119
+ product_norms = np.linalg.norm(product_embeddings, axis=1, keepdims=True)
120
+ normalized_products = product_embeddings / product_norms
121
+
122
+ # Compute all similarities at once using matrix multiplication
123
+ # (dot product of normalized vectors = cosine similarity)
124
+ similarity_matrix = np.dot(normalized_products, normalized_ingredients.T)
125
+
126
+ # Process and store results
127
+ for p_idx, product in enumerate(valid_products):
128
+ product_similarities = [(ingredient_names[i_idx], float(similarity_matrix[p_idx, i_idx]))
129
+ for i_idx in range(len(ingredient_names))]
130
+
131
+ # Sort by similarity score (descending)
132
+ product_similarities.sort(key=lambda x: x[1], reverse=True)
133
+ all_similarities[product] = product_similarities
134
+
135
+ return all_similarities
136
+
137
+ # ===== Main Application Functions =====
138
+ def load_embeddings(embeddings_path):
139
+ """Load ingredient embeddings from pickle file"""
140
+ print(f"Loading ingredient embeddings from {embeddings_path}")
141
+ with open(embeddings_path, "rb") as f:
142
+ ingredients_embeddings = pickle.load(f)
143
+ print(f"Loaded {len(ingredients_embeddings)} ingredient embeddings")
144
+ return ingredients_embeddings
145
+
146
+ def categorize_products_from_text(product_text, embeddings, progress=gr.Progress(), top_n=5, confidence_threshold=0.5):
147
+ """Categorize products from text input (one product per line)"""
148
+ # Parse input text to get product names
149
+ product_names = [line.strip() for line in product_text.split("\n") if line.strip()]
150
+
151
+ if not product_names:
152
+ return "No product names provided."
153
+
154
+ progress(0, desc="Starting...")
155
+
156
+ # Create product embeddings
157
+ progress(0.1, desc="Generating product embeddings...")
158
+ products_embeddings = create_product_embeddings_voyageai(product_names)
159
+
160
+ # Compute similarities
161
+ progress(0.6, desc="Computing similarities...")
162
+ all_similarities = compute_similarities(embeddings, products_embeddings)
163
+
164
+ # Format results
165
+ progress(0.9, desc="Formatting results...")
166
+ results = {}
167
+ for product, similarities in all_similarities.items():
168
+ # Filter by confidence threshold and take top N
169
+ filtered_similarities = [(ingredient, score) for ingredient, score in similarities
170
+ if score >= confidence_threshold]
171
+ top_similarities = filtered_similarities[:top_n]
172
+
173
+ results[product] = top_similarities
174
+
175
+ # Format as readable text
176
+ output_text = ""
177
+ for product, categories in results.items():
178
+ output_text += f"Product: {product}\n"
179
+ if categories:
180
+ for i, (category, score) in enumerate(categories, 1):
181
+ output_text += f" {i}. {category} (confidence: {score:.3f})\n"
182
+ else:
183
+ output_text += " No matching categories found.\n"
184
+ output_text += "\n"
185
+
186
+ progress(1.0, desc="Done!")
187
+ return output_text
188
+
189
+ def categorize_products_from_file(file, embeddings, progress=gr.Progress(), top_n=5, confidence_threshold=0.5):
190
+ """Categorize products from a JSON file"""
191
+ progress(0.1, desc="Reading file...")
192
+
193
+ try:
194
+ with open(file.name, 'r') as f:
195
+ try:
196
+ products_data = json.load(f)
197
+ if isinstance(products_data, list):
198
+ # Extract product names if it's a list of objects with 'name' field
199
+ if all(isinstance(item, dict) for item in products_data):
200
+ product_names = [item.get('name', '') for item in products_data if isinstance(item, dict)]
201
+ else:
202
+ # If it's just a list of strings
203
+ product_names = [str(item) for item in products_data if item]
204
+ else:
205
+ # If it's just a list of product names
206
+ product_names = []
207
+ except json.JSONDecodeError:
208
+ # If not JSON, try reading as text file with one product per line
209
+ f.seek(0)
210
+ product_names = [line.strip() for line in f.readlines() if line.strip()]
211
+ except Exception as e:
212
+ return f"Error reading file: {str(e)}"
213
+
214
+ if not product_names:
215
+ return "No product names found in the file."
216
+
217
+ # Create product embeddings
218
+ progress(0.2, desc="Generating product embeddings...")
219
+ products_embeddings = create_product_embeddings_voyageai(product_names)
220
+
221
+ # Compute similarities
222
+ progress(0.7, desc="Computing similarities...")
223
+ all_similarities = compute_similarities(embeddings, products_embeddings)
224
+
225
+ # Format results
226
+ progress(0.9, desc="Formatting results...")
227
+ output_text = f"Found {len(product_names)} products in file.\n\n"
228
+
229
+ for product, similarities in all_similarities.items():
230
+ # Filter by confidence threshold and take top N
231
+ filtered_similarities = [(ingredient, score) for ingredient, score in similarities
232
+ if score >= confidence_threshold]
233
+ top_similarities = filtered_similarities[:top_n]
234
+
235
+ output_text += f"Product: {product}\n"
236
+ if top_similarities:
237
+ for i, (category, score) in enumerate(top_similarities, 1):
238
+ output_text += f" {i}. {category} (confidence: {score:.3f})\n"
239
+ else:
240
+ output_text += " No matching categories found.\n"
241
+ output_text += "\n"
242
+
243
+ progress(1.0, desc="Done!")
244
+ return output_text
245
+
246
+ # ===== Gradio Interface Setup =====
247
+ def create_interface(embeddings_path="ingredient_embeddings_voyageai.pkl"):
248
+ # Load embeddings once at startup
249
+ embeddings = load_embeddings(embeddings_path)
250
+
251
+ # Text input interface
252
+ with gr.Blocks() as demo:
253
+ gr.Markdown("# Product Categorization Tool")
254
+ gr.Markdown("This tool uses AI to categorize products based on their similarity to known ingredients.")
255
+
256
+ with gr.Tabs():
257
+ with gr.TabItem("Text Input"):
258
+ with gr.Row():
259
+ with gr.Column():
260
+ text_input = gr.Textbox(
261
+ lines=10,
262
+ placeholder="Enter product names, one per line",
263
+ label="Product Names"
264
+ )
265
+ top_n = gr.Slider(
266
+ minimum=1,
267
+ maximum=10,
268
+ value=5,
269
+ step=1,
270
+ label="Number of Top Categories"
271
+ )
272
+ confidence = gr.Slider(
273
+ minimum=0.1,
274
+ maximum=0.9,
275
+ value=0.5,
276
+ step=0.05,
277
+ label="Confidence Threshold"
278
+ )
279
+ submit_button = gr.Button("Categorize Products")
280
+
281
+ with gr.Column():
282
+ text_output = gr.Textbox(label="Categorization Results", lines=20)
283
+
284
+ submit_button.click(
285
+ fn=lambda text, top_n, conf, prog: categorize_products_from_text(
286
+ text, embeddings, prog, top_n, conf
287
+ ),
288
+ inputs=[text_input, top_n, confidence],
289
+ outputs=text_output
290
+ )
291
+
292
+ with gr.TabItem("File Upload"):
293
+ with gr.Row():
294
+ with gr.Column():
295
+ file_input = gr.File(label="Upload JSON file with products")
296
+ file_top_n = gr.Slider(
297
+ minimum=1,
298
+ maximum=10,
299
+ value=5,
300
+ step=1,
301
+ label="Number of Top Categories"
302
+ )
303
+ file_confidence = gr.Slider(
304
+ minimum=0.1,
305
+ maximum=0.9,
306
+ value=0.5,
307
+ step=0.05,
308
+ label="Confidence Threshold"
309
+ )
310
+ file_button = gr.Button("Process File")
311
+
312
+ with gr.Column():
313
+ file_output = gr.Textbox(label="Categorization Results", lines=20)
314
+
315
+ file_button.click(
316
+ fn=lambda file, top_n, conf, prog: categorize_products_from_file(
317
+ file, embeddings, prog, top_n, conf
318
+ ),
319
+ inputs=[file_input, file_top_n, file_confidence],
320
+ outputs=file_output
321
+ )
322
+
323
+ gr.Markdown("### Example Input")
324
+ gr.Markdown("Try entering product names like:\n- Tomato Sauce\n- Apple Pie\n- Greek Yogurt\n- Chocolate Chip Cookies")
325
+
326
+ return demo
327
+
328
+ if __name__ == "__main__":
329
+ import argparse
330
+
331
+ parser = argparse.ArgumentParser(description='Run the Product Categorization web app')
332
+ parser.add_argument('--embeddings', default='ingredient_embeddings_voyageai.pkl',
333
+ help='Path to the ingredient embeddings pickle file')
334
+ parser.add_argument('--share', action='store_true', help='Create a public link for sharing')
335
+
336
+ args = parser.parse_args()
337
+
338
+ # Create and launch the interface
339
+ demo = create_interface(args.embeddings)
340
+ demo.launch(share=args.share)
readme.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Product Categorization App - One-Click Solution
2
+
3
+ This is a turnkey solution for categorizing products based on their similarity to ingredients using Voyage AI.
4
+
5
+ ## Quick Start
6
+
7
+ 1. Place your `ingredient_embeddings_voyageai.pkl` file in the same folder as this README
8
+ 2. Run the application:
9
+
10
+ ```bash
11
+ bash run_app.sh
12
+ ```
13
+
14
+ 3. That's it! A browser window will open with the app, and a public URL will be created for sharing
15
+
16
+ ## What You Can Do
17
+
18
+ - **Text Input:** Enter product names one per line
19
+ - **File Upload:** Upload a JSON file with product data
20
+ - Adjust the number of categories and confidence threshold
21
+ - View the categorization results with confidence scores
22
+
23
+ ## Hosting on Hugging Face Spaces
24
+
25
+ For permanent, free hosting on Gradio:
26
+
27
+ 1. Create a free account on [Hugging Face](https://huggingface.co/)
28
+ 2. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
29
+ 3. Click "Create a Space"
30
+ 4. Select "Gradio" as the SDK
31
+ 5. Upload all files (including your embeddings file) to the space
32
+ 6. Your app will be automatically deployed!
33
+
34
+ ## Files Included
35
+
36
+ - `app.py`: The main application code
37
+ - `requirements.txt`: Required Python packages
38
+ - `run_app.sh`: One-click deployment script
39
+
40
+ ## Requirements
41
+
42
+ - Python 3.7+
43
+ - Internet connection (for Voyage AI API)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ voyageai==0.2.3
2
+ numpy==1.24.3
3
+ gradio==4.12.0
run_app.sh ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Install required packages
4
+ pip install -r requirements.txt
5
+
6
+ # Check if embeddings file exists
7
+ if [ -f "ingredient_embeddings_voyageai.pkl" ]; then
8
+ # Run with local embeddings file
9
+ python app.py --share
10
+ else
11
+ echo "ERROR: ingredient_embeddings_voyageai.pkl file not found!"
12
+ echo "Please place the embeddings file in the same directory as this script."
13
+ exit 1
14
+ fi
spaces.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pickle
3
+ import os
4
+ import json
5
+ import numpy as np
6
+ import voyageai
7
+ import time
8
+ import sys
9
+
10
+ # Set Voyage AI API key directly
11
+ voyageai.api_key = "pa-DvIuCX_5TrCyxS6y74sUYpyWWGd4gN0Kf52y642y6k0"
12
+
13
+ # Import all necessary functions from the main app
14
+ from app import create_product_embeddings_voyageai, get_embeddings_batch, compute_similarities
15
+
16
+ # Path to the embeddings file for Hugging Face Spaces
17
+ EMBEDDINGS_PATH = "ingredient_embeddings_voyageai.pkl"
18
+
19
+ # Load the embeddings
20
+ print(f"Loading ingredient embeddings from {EMBEDDINGS_PATH}")
21
+ try:
22
+ with open(EMBEDDINGS_PATH, "rb") as f:
23
+ embeddings = pickle.load(f)
24
+ print(f"Successfully loaded {len(embeddings)} ingredient embeddings")
25
+ except Exception as e:
26
+ print(f"ERROR: Failed to load embeddings: {e}")
27
+ # Create an empty dict as fallback
28
+ embeddings = {}
29
+
30
+ # Define the categorization function for text input
31
+ def categorize_products_from_text(product_text, progress=gr.Progress(), top_n=5, confidence_threshold=0.5):
32
+ """Categorize products from text input (one product per line)"""
33
+ # Parse input text to get product names
34
+ product_names = [line.strip() for line in product_text.split("\n") if line.strip()]
35
+
36
+ if not product_names:
37
+ return "No product names provided."
38
+
39
+ progress(0.1, desc="Generating product embeddings...")
40
+
41
+ # Create product embeddings
42
+ products_embeddings = create_product_embeddings_voyageai(product_names)
43
+
44
+ # Compute similarities
45
+ progress(0.6, desc="Computing similarities...")
46
+ all_similarities = compute_similarities(embeddings, products_embeddings)
47
+
48
+ # Format results
49
+ progress(0.9, desc="Formatting results...")
50
+ output_text = ""
51
+ for product, similarities in all_similarities.items():
52
+ # Filter by confidence threshold and take top N
53
+ filtered_similarities = [(ingredient, score) for ingredient, score in similarities
54
+ if score >= confidence_threshold]
55
+ top_similarities = filtered_similarities[:top_n]
56
+
57
+ output_text += f"Product: {product}\n"
58
+ if top_similarities:
59
+ for i, (category, score) in enumerate(top_similarities, 1):
60
+ output_text += f" {i}. {category} (confidence: {score:.3f})\n"
61
+ else:
62
+ output_text += " No matching categories found.\n"
63
+ output_text += "\n"
64
+
65
+ progress(1.0, desc="Done!")
66
+ return output_text
67
+
68
+ # Define the categorization function for file input
69
+ def categorize_products_from_file(file, progress=gr.Progress(), top_n=5, confidence_threshold=0.5):
70
+ """Categorize products from a JSON file"""
71
+ progress(0.1, desc="Reading file...")
72
+
73
+ try:
74
+ with open(file.name, 'r') as f:
75
+ try:
76
+ products_data = json.load(f)
77
+ if isinstance(products_data, list):
78
+ # Extract product names if it's a list of objects with 'name' field
79
+ if all(isinstance(item, dict) for item in products_data):
80
+ product_names = [item.get('name', '') for item in products_data if isinstance(item, dict)]
81
+ else:
82
+ # If it's just a list of strings
83
+ product_names = [str(item) for item in products_data if item]
84
+ else:
85
+ # If it's just a list of product names
86
+ product_names = []
87
+ except json.JSONDecodeError:
88
+ # If not JSON, try reading as text file with one product per line
89
+ f.seek(0)
90
+ product_names = [line.strip() for line in f.readlines() if line.strip()]
91
+ except Exception as e:
92
+ return f"Error reading file: {str(e)}"
93
+
94
+ if not product_names:
95
+ return "No product names found in the file."
96
+
97
+ # Create product embeddings
98
+ progress(0.2, desc="Generating product embeddings...")
99
+ products_embeddings = create_product_embeddings_voyageai(product_names)
100
+
101
+ # Compute similarities
102
+ progress(0.7, desc="Computing similarities...")
103
+ all_similarities = compute_similarities(embeddings, products_embeddings)
104
+
105
+ # Format results
106
+ progress(0.9, desc="Formatting results...")
107
+ output_text = f"Found {len(product_names)} products in file.\n\n"
108
+
109
+ for product, similarities in all_similarities.items():
110
+ # Filter by confidence threshold and take top N
111
+ filtered_similarities = [(ingredient, score) for ingredient, score in similarities
112
+ if score >= confidence_threshold]
113
+ top_similarities = filtered_similarities[:top_n]
114
+
115
+ output_text += f"Product: {product}\n"
116
+ if top_similarities:
117
+ for i, (category, score) in enumerate(top_similarities, 1):
118
+ output_text += f" {i}. {category} (confidence: {score:.3f})\n"
119
+ else:
120
+ output_text += " No matching categories found.\n"
121
+ output_text += "\n"
122
+
123
+ progress(1.0, desc="Done!")
124
+ return output_text
125
+
126
+ # Create the Gradio interface
127
+ with gr.Blocks() as demo:
128
+ gr.Markdown("# Product Categorization Tool")
129
+ gr.Markdown("This tool uses AI to categorize products based on their similarity to known ingredients.")
130
+
131
+ with gr.Tabs():
132
+ with gr.TabItem("Text Input"):
133
+ with gr.Row():
134
+ with gr.Column():
135
+ text_input = gr.Textbox(
136
+ lines=10,
137
+ placeholder="Enter product names, one per line",
138
+ label="Product Names"
139
+ )
140
+ top_n = gr.Slider(
141
+ minimum=1,
142
+ maximum=10,
143
+ value=5,
144
+ step=1,
145
+ label="Number of Top Categories"
146
+ )
147
+ confidence = gr.Slider(
148
+ minimum=0.1,
149
+ maximum=0.9,
150
+ value=0.5,
151
+ step=0.05,
152
+ label="Confidence Threshold"
153
+ )
154
+ submit_button = gr.Button("Categorize Products")
155
+
156
+ with gr.Column():
157
+ text_output = gr.Textbox(label="Categorization Results", lines=20)
158
+
159
+ submit_button.click(
160
+ fn=categorize_products_from_text,
161
+ inputs=[text_input, top_n, confidence],
162
+ outputs=text_output
163
+ )
164
+
165
+ with gr.TabItem("File Upload"):
166
+ with gr.Row():
167
+ with gr.Column():
168
+ file_input = gr.File(label="Upload JSON file with products")
169
+ file_top_n = gr.Slider(
170
+ minimum=1,
171
+ maximum=10,
172
+ value=5,
173
+ step=1,
174
+ label="Number of Top Categories"
175
+ )
176
+ file_confidence = gr.Slider(
177
+ minimum=0.1,
178
+ maximum=0.9,
179
+ value=0.5,
180
+ step=0.05,
181
+ label="Confidence Threshold"
182
+ )
183
+ file_button = gr.Button("Process File")
184
+
185
+ with gr.Column():
186
+ file_output = gr.Textbox(label="Categorization Results", lines=20)
187
+
188
+ file_button.click(
189
+ fn=categorize_products_from_file,
190
+ inputs=[file_input, file_top_n, file_confidence],
191
+ outputs=file_output
192
+ )
193
+
194
+ gr.Markdown("### Example Input")
195
+ gr.Markdown("Try entering product names like:\n- Tomato Sauce\n- Apple Pie\n- Greek Yogurt\n- Chocolate Chip Cookies")
196
+
197
+ # Launch the demo (for Hugging Face Spaces)
198
+ demo.launch()