import numpy as np def compute_similarities(ingredients_dict, products_dict): """Compute similarities between all products and ingredients using NumPy""" # Filter valid ingredients (with non-None embeddings) ingredient_names = [] ingredient_embeddings_list = [] for ing, emb in ingredients_dict.items(): if emb is not None: ingredient_names.append(ing) ingredient_embeddings_list.append(emb) # Convert ingredient embeddings to numpy array ingredient_embeddings = np.array(ingredient_embeddings_list, dtype=np.float32) # Normalize ingredient embeddings for cosine similarity ingredient_norms = np.linalg.norm(ingredient_embeddings, axis=1, keepdims=True) normalized_ingredients = ingredient_embeddings / ingredient_norms # Process all products all_similarities = {} valid_products = [] valid_embeddings = [] for product, embedding in products_dict.items(): if embedding is not None: valid_products.append(product) valid_embeddings.append(embedding) if not valid_products: return {} # Convert product embeddings to numpy array product_embeddings = np.array(valid_embeddings, dtype=np.float32) # Normalize product embeddings product_norms = np.linalg.norm(product_embeddings, axis=1, keepdims=True) normalized_products = product_embeddings / product_norms # Compute all similarities at once using matrix multiplication # (dot product of normalized vectors = cosine similarity) similarity_matrix = np.dot(normalized_products, normalized_ingredients.T) # Process and store results for p_idx, product in enumerate(valid_products): product_similarities = [(ingredient_names[i_idx], float(similarity_matrix[p_idx, i_idx])) for i_idx in range(len(ingredient_names))] # Sort by similarity score (descending) product_similarities.sort(key=lambda x: x[1], reverse=True) all_similarities[product] = product_similarities return all_similarities