SURIYA-KP's picture
Add example usage script
a8cbb76 verified
import torch
from transformers import AutoModel, AutoTokenizer
# Load model and tokenizer
model_name = "SURIYA-KP/small-sentence-embeddings-fine-tuned-depression-symptoms"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
# Function to get embeddings
def get_embedding(text):
# Tokenize
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
# Get model output
with torch.no_grad():
outputs = model(**inputs)
# Mean pooling
token_embeddings = outputs.last_hidden_state
attention_mask = inputs['attention_mask']
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
embedding = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
return embedding.numpy()[0]
# Example usage
text1 = "I feel worthless and useless."
text2 = "I am feeling happy and content today."
emb1 = get_embedding(text1)
emb2 = get_embedding(text2)
# Calculate cosine similarity
cos_sim = torch.nn.functional.cosine_similarity(
torch.tensor(emb1).unsqueeze(0),
torch.tensor(emb2).unsqueeze(0)
).item()
print(f"Cosine similarity between texts: {cos_sim:.4f}")