Spaces:
Sleeping
Sleeping
import numpy as np | |
def compute_similarities(ingredients_dict, products_dict): | |
"""Compute similarities between all products and ingredients using NumPy""" | |
# Filter valid ingredients (with non-None embeddings) | |
ingredient_names = [] | |
ingredient_embeddings_list = [] | |
for ing, emb in ingredients_dict.items(): | |
if emb is not None: | |
ingredient_names.append(ing) | |
ingredient_embeddings_list.append(emb) | |
# Convert ingredient embeddings to numpy array | |
ingredient_embeddings = np.array(ingredient_embeddings_list, dtype=np.float32) | |
# Normalize ingredient embeddings for cosine similarity | |
ingredient_norms = np.linalg.norm(ingredient_embeddings, axis=1, keepdims=True) | |
normalized_ingredients = ingredient_embeddings / ingredient_norms | |
# Process all products | |
all_similarities = {} | |
valid_products = [] | |
valid_embeddings = [] | |
for product, embedding in products_dict.items(): | |
if embedding is not None: | |
valid_products.append(product) | |
valid_embeddings.append(embedding) | |
if not valid_products: | |
return {} | |
# Convert product embeddings to numpy array | |
product_embeddings = np.array(valid_embeddings, dtype=np.float32) | |
# Normalize product embeddings | |
product_norms = np.linalg.norm(product_embeddings, axis=1, keepdims=True) | |
normalized_products = product_embeddings / product_norms | |
# Compute all similarities at once using matrix multiplication | |
# (dot product of normalized vectors = cosine similarity) | |
similarity_matrix = np.dot(normalized_products, normalized_ingredients.T) | |
# Process and store results | |
for p_idx, product in enumerate(valid_products): | |
product_similarities = [(ingredient_names[i_idx], float(similarity_matrix[p_idx, i_idx])) | |
for i_idx in range(len(ingredient_names))] | |
# Sort by similarity score (descending) | |
product_similarities.sort(key=lambda x: x[1], reverse=True) | |
all_similarities[product] = product_similarities | |
return all_similarities | |