esilver commited on
Commit
e314c06
·
1 Parent(s): 0ce15ef

More bug fixes

Browse files
Files changed (2) hide show
  1. category_matching.py +7 -5
  2. ui_hybrid_matching.py +2 -0
category_matching.py CHANGED
@@ -154,11 +154,8 @@ def match_products_to_categories(product_names: List[str], categories: Dict[str,
154
  progress_tracker(0.8, desc="Processing results")
155
 
156
  for product, product_similarities in similarities.items():
157
- # Filter by threshold and take top N
158
- filtered_categories = [(category_id, score)
159
- for category_id, score in product_similarities
160
- if score >= confidence_threshold]
161
- top_categories = filtered_categories[:top_n]
162
 
163
  # Add category texts to the results
164
  results[product] = [(category_id, categories.get(category_id, "Unknown"), score)
@@ -228,6 +225,10 @@ def hybrid_category_matching(products: List[str], categories: Dict[str, str],
228
  query = f"Which category best describes the product: {expanded_descriptions[product]}"
229
  else:
230
  query = f"Which category best describes the product: {product}"
 
 
 
 
231
  reranking = client.rerank(
232
  query=query,
233
  documents=candidate_texts,
@@ -237,6 +238,7 @@ def hybrid_category_matching(products: List[str], categories: Dict[str, str],
237
 
238
  # Process re-ranking results
239
  product_categories = []
 
240
  for result in reranking.results:
241
  # Find the category ID for this result
242
  candidate_index = candidate_texts.index(result.document)
 
154
  progress_tracker(0.8, desc="Processing results")
155
 
156
  for product, product_similarities in similarities.items():
157
+ # Take top N without filtering by threshold
158
+ top_categories = product_similarities[:top_n]
 
 
 
159
 
160
  # Add category texts to the results
161
  results[product] = [(category_id, categories.get(category_id, "Unknown"), score)
 
225
  query = f"Which category best describes the product: {expanded_descriptions[product]}"
226
  else:
227
  query = f"Which category best describes the product: {product}"
228
+
229
+
230
+ print(f"Query: {query}")
231
+
232
  reranking = client.rerank(
233
  query=query,
234
  documents=candidate_texts,
 
238
 
239
  # Process re-ranking results
240
  product_categories = []
241
+ print(f"RERAANKING RESULTS: {reranking.results}")
242
  for result in reranking.results:
243
  # Find the category ID for this result
244
  candidate_index = candidate_texts.index(result.document)
ui_hybrid_matching.py CHANGED
@@ -98,6 +98,8 @@ def categorize_products_with_voyage_reranking(product_input, is_file=False, use_
98
  if not formatted_results:
99
  return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>"
100
 
 
 
101
  result_html = format_reranking_results_html(
102
  results=formatted_results,
103
  match_type=match_type,
 
98
  if not formatted_results:
99
  return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>"
100
 
101
+ print(f"FORMAT RESULTS: {formatted_results}")
102
+
103
  result_html = format_reranking_results_html(
104
  results=formatted_results,
105
  match_type=match_type,