import pickle import json import os class SafeProgress: """Wrapper for progress tracking that handles None gracefully""" def __init__(self, progress_obj=None): self.progress = progress_obj def __call__(self, value, desc=""): if self.progress is not None: try: self.progress(value, desc=desc) except: print(f"Progress {value}: {desc}") else: print(f"Progress {value}: {desc}") def load_embeddings(embeddings_path): """Load ingredient embeddings from pickle file""" print(f"Loading ingredient embeddings from {embeddings_path}") with open(embeddings_path, "rb") as f: ingredients_embeddings = pickle.load(f) print(f"Loaded {len(ingredients_embeddings)} ingredient embeddings") return ingredients_embeddings def parse_product_file(file_path): """Parse a file containing product data and extract product names""" try: with open(file_path, 'r') as f: try: products_data = json.load(f) if isinstance(products_data, list): # Extract product names if it's a list of objects with 'name' field if all(isinstance(item, dict) for item in products_data): product_names = [item.get('name', '') for item in products_data if isinstance(item, dict)] else: # If it's just a list of strings product_names = [str(item) for item in products_data if item] else: # If it's just a list of product names product_names = [] except json.JSONDecodeError: # If not JSON, try reading as text file with one product per line f.seek(0) product_names = [line.strip() for line in f.readlines() if line.strip()] except Exception as e: raise Exception(f"Error reading file: {str(e)}") return product_names def format_categories_html(product, similarities, chicory_result=None): """Format the similarities as HTML with bootstrap styling""" html = f"

{product}

" # Add Chicory results with enhanced styling if chicory_result: html += "
" html += "

Chicory Parser Results

" if isinstance(chicory_result, dict): # Extract important fields with better formatting ingredient = chicory_result.get("ingredient", "Not found") confidence = chicory_result.get("confidence", 0) confidence_pct = int(confidence * 100) if confidence else 0 html += f"
" html += f"{ingredient}" html += f"{confidence_pct}%" html += "
" html += "
" # Add embedding similarities with matching styling if similarities: html += "
" html += "

Embedding Similarity

" for i, (ingredient, score) in enumerate(similarities): confidence_pct = int(score * 100) html += f"
" html += f"{ingredient}" html += f"{confidence_pct}%" html += "
" html += "
" else: html += "

No similar ingredients found above the confidence threshold.

" html += "
" return html def get_confidence_color(score): """Get color based on confidence score""" if score >= 0.8: return "#1a8a38" # Strong green elif score >= 0.65: return "#4caf50" # Medium green elif score >= 0.5: return "#8bc34a" # Light green else: return "#9e9e9e" # Gray def get_confidence_bg_color(score): """Get background color for confidence badge based on score""" if score >= 0.8: return "#2e7d32" # Dark green elif score >= 0.65: return "#558b2f" # Medium green elif score >= 0.5: return "#9e9d24" # Light green/yellow else: return "#757575" # Gray def get_confidence_text_color(score): """Get text color that's readable on the confidence background""" if score >= 0.5: return "#ffffff" # White text on dark backgrounds else: return "#f5f5f5" # Light gray on gray background