ajaynagotha's picture
Update app.py
86e6f42 verified
raw
history blame
1.96 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer, util
import torch
from datasets import load_dataset
# Load the model and tokenizer
model_name = "google/flan-t5-xl"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# Load the Gita dataset
ds = load_dataset("knowrohit07/gita_dataset")
chapters = ds['train']['Chapter']
sentence_ranges = ds['train']['sentence_range']
texts = ds['train']['Text']
# Load a sentence transformer model for semantic search
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
# Encode all texts for faster similarity search
text_embeddings = sentence_model.encode(texts, convert_to_tensor=True)
def find_relevant_texts(query, top_k=3):
query_embedding = sentence_model.encode(query, convert_to_tensor=True)
cos_scores = util.cos_sim(query_embedding, text_embeddings)[0]
top_results = torch.topk(cos_scores, k=top_k)
relevant_texts = []
for score, idx in zip(top_results[0], top_results[1]):
relevant_texts.append(f"Chapter {chapters[idx]}, Verses {sentence_ranges[idx]}: {texts[idx]}")
return "\n\n".join(relevant_texts)
def generate_response(question):
relevant_texts = find_relevant_texts(question)
prompt = f"""Based on the following excerpts from the Bhagavad Gita, answer the question.
Relevant excerpts:
{relevant_texts}
Question: {question}
Answer:"""
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
outputs = model.generate(input_ids, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.95)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
iface = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Enter your question about the Bhagavad Gita here..."),
outputs="text"
)
iface.launch()