product_ingredient_demo / similarity.py
esilver's picture
refactored
a318724
raw
history blame
2.09 kB
import numpy as np
def compute_similarities(ingredients_dict, products_dict):
"""Compute similarities between all products and ingredients using NumPy"""
# Filter valid ingredients (with non-None embeddings)
ingredient_names = []
ingredient_embeddings_list = []
for ing, emb in ingredients_dict.items():
if emb is not None:
ingredient_names.append(ing)
ingredient_embeddings_list.append(emb)
# Convert ingredient embeddings to numpy array
ingredient_embeddings = np.array(ingredient_embeddings_list, dtype=np.float32)
# Normalize ingredient embeddings for cosine similarity
ingredient_norms = np.linalg.norm(ingredient_embeddings, axis=1, keepdims=True)
normalized_ingredients = ingredient_embeddings / ingredient_norms
# Process all products
all_similarities = {}
valid_products = []
valid_embeddings = []
for product, embedding in products_dict.items():
if embedding is not None:
valid_products.append(product)
valid_embeddings.append(embedding)
if not valid_products:
return {}
# Convert product embeddings to numpy array
product_embeddings = np.array(valid_embeddings, dtype=np.float32)
# Normalize product embeddings
product_norms = np.linalg.norm(product_embeddings, axis=1, keepdims=True)
normalized_products = product_embeddings / product_norms
# Compute all similarities at once using matrix multiplication
# (dot product of normalized vectors = cosine similarity)
similarity_matrix = np.dot(normalized_products, normalized_ingredients.T)
# Process and store results
for p_idx, product in enumerate(valid_products):
product_similarities = [(ingredient_names[i_idx], float(similarity_matrix[p_idx, i_idx]))
for i_idx in range(len(ingredient_names))]
# Sort by similarity score (descending)
product_similarities.sort(key=lambda x: x[1], reverse=True)
all_similarities[product] = product_similarities
return all_similarities