Spaces:

bhutesh65
/

Jupiter-FAQ-streamlit

Running

Jupiter-FAQ-streamlit / semantic_search.py

Upload 12 files

001593c verified 18 days ago

1.48 kB

	# semantic_search.py

	import json
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity

	# Load cleaned FAQs
	with open("cleaned_faqs.json", "r", encoding="utf-8") as f:
	data = json.load(f)

	questions = [item["question"] for item in data]
	model = SentenceTransformer('all-MiniLM-L6-v2') # lightweight and HuggingFace-friendly

	# Generate embeddings
	embeddings = model.encode(questions)

	def search_faq(query, top_k=3):
	query_embedding = model.encode([query])
	scores = cosine_similarity(query_embedding, embeddings)[0]
	top_indices = np.argsort(scores)[::-1][:top_k]

	results = []
	for idx in top_indices:
	results.append(data[idx])
	return results

	#----------------------------------------------------------------------------
	# from rephrase_with_mistral import rephrase_with_mistral
	# from semantic_search import search_faq

	# api_key = "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" # <-- Paste your actual API key here

	# query = "how do I update my KYC?"
	# top_faq = search_faq(query)[0]

	# print("🔎 FAQ Retrieved:")
	# print(top_faq['question'])
	# print(top_faq['answer'])

	# # Now rephrase
	# print("\n💬 Rephrased Answer:")
	# #print(rephrase_with_mistral(top_faq['question'], top_faq['answer'], api_key))
	# print(rephrase_with_mistral(query, [top_faq], api_key)) # use a list of one FAQ