Spaces:
Running
Running
# semantic_search.py | |
import json | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
# Load cleaned FAQs | |
with open("cleaned_faqs.json", "r", encoding="utf-8") as f: | |
data = json.load(f) | |
questions = [item["question"] for item in data] | |
model = SentenceTransformer('all-MiniLM-L6-v2') # lightweight and HuggingFace-friendly | |
# Generate embeddings | |
embeddings = model.encode(questions) | |
def search_faq(query, top_k=3): | |
query_embedding = model.encode([query]) | |
scores = cosine_similarity(query_embedding, embeddings)[0] | |
top_indices = np.argsort(scores)[::-1][:top_k] | |
results = [] | |
for idx in top_indices: | |
results.append(data[idx]) | |
return results | |
#---------------------------------------------------------------------------- | |
# from rephrase_with_mistral import rephrase_with_mistral | |
# from semantic_search import search_faq | |
# api_key = "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" # <-- Paste your actual API key here | |
# query = "how do I update my KYC?" | |
# top_faq = search_faq(query)[0] | |
# print("π FAQ Retrieved:") | |
# print(top_faq['question']) | |
# print(top_faq['answer']) | |
# # Now rephrase | |
# print("\n㪠Rephrased Answer:") | |
# #print(rephrase_with_mistral(top_faq['question'], top_faq['answer'], api_key)) | |
# print(rephrase_with_mistral(query, [top_faq], api_key)) # use a list of one FAQ | |