|
|
|
|
|
from sentence_transformers import SentenceTransformer |
|
from sentence_transformers.models import Transformer, Pooling |
|
import numpy as np |
|
|
|
|
|
|
|
model_name = 'BAAI/bge-large-en-v1.5' |
|
|
|
|
|
transformer = Transformer(model_name) |
|
|
|
|
|
dim = transformer.get_word_embedding_dimension() |
|
|
|
|
|
pooling = Pooling(dim) |
|
|
|
|
|
model = SentenceTransformer(modules=[transformer, pooling]) |
|
|
|
|
|
def get_word_norm(word): |
|
|
|
|
|
|
|
embedding = model.encode(word, normalize_embeddings=False) |
|
l2_norm = np.linalg.norm(embedding) |
|
return l2_norm |
|
|
|
|
|
def category_score(norm, scale_factor=1.0): |
|
return scale_factor / norm |
|
|
|
|
|
words = ['animal', 'cat', 'mammal', 'siamese', 'thing', 'eiffel tower'] |
|
|
|
|
|
results = {} |
|
for word in words: |
|
norm = get_word_norm(word) |
|
score = category_score(norm) |
|
results[word] = {'norm': norm, 'score': score} |
|
|
|
|
|
sorted_results = sorted(results.items(), key=lambda x: x[1]['score'], reverse=True) |
|
for word, data in sorted_results: |
|
print(f"Word: {word}\tNorm: {data['norm']:.4f}\tCategory Score: {data['score']:.4f}") |
|
|