import torch from transformers import AutoModel, AutoTokenizer # Load model and tokenizer model_name = "SURIYA-KP/small-sentence-embeddings-fine-tuned-depression-symptoms" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) # Function to get embeddings def get_embedding(text): # Tokenize inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128) # Get model output with torch.no_grad(): outputs = model(**inputs) # Mean pooling token_embeddings = outputs.last_hidden_state attention_mask = inputs['attention_mask'] input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() embedding = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) return embedding.numpy()[0] # Example usage text1 = "I feel worthless and useless." text2 = "I am feeling happy and content today." emb1 = get_embedding(text1) emb2 = get_embedding(text2) # Calculate cosine similarity cos_sim = torch.nn.functional.cosine_similarity( torch.tensor(emb1).unsqueeze(0), torch.tensor(emb2).unsqueeze(0) ).item() print(f"Cosine similarity between texts: {cos_sim:.4f}")