Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from peft import PeftModel | |
from transformers import AutoModelForSeq2SeqLM, pipeline, AutoTokenizer | |
from langchain_community.vectorstores import FAISS | |
from langchain_huggingface import HuggingFaceEmbeddings | |
print("Loading all models...") | |
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2" | |
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name) | |
base_model_name = "facebook/mbart-large-50-many-to-many-mmt" | |
lora_adapter_id = "sourabsb/english_kumaoni-mbart_large" | |
tokenizer = AutoTokenizer.from_pretrained(base_model_name, src_lang="en_XX", use_fast=False) | |
base_model = AutoModelForSeq2SeqLM.from_pretrained(base_model_name) | |
finetuned_model = PeftModel.from_pretrained(base_model, lora_adapter_id) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
finetuned_model = finetuned_model.to(device) | |
print("β Models loaded.") | |
print("Loading the saved RAG database...") | |
vector_store = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) | |
retriever = vector_store.as_retriever( | |
search_type="similarity_score_threshold", | |
search_kwargs={'k': 1, 'score_threshold': 0.95} | |
) | |
print("β RAG database loaded.") | |
translator_pipeline = pipeline( | |
"translation", | |
model=finetuned_model, | |
tokenizer=tokenizer, | |
src_lang="en_XX", | |
tgt_lang="hi_IN", | |
device=0 | |
) | |
def hybrid_translate(english_query): | |
if not english_query: | |
return "" | |
retrieved_docs = retriever.get_relevant_documents(english_query) | |
if retrieved_docs: | |
translation = retrieved_docs[0].metadata['kumaoni_translation'] | |
else: | |
translation_result = translator_pipeline(english_query, max_length=50) | |
translation = translation_result[0]['translation_text'] | |
return translation | |
with gr.Blocks(theme=gr.themes.Soft(), title="Kumaoni Translator") as demo: | |
gr.Markdown("# English to Kumaoni Translator") | |
gr.Markdown("A hybrid AI model for English to Kumaoni Roman translation, built by Sourab.") | |
with gr.Row(): | |
with gr.Column(): | |
english_input = gr.Textbox(lines=5, placeholder="Type your English sentence here...", label="English Input") | |
with gr.Row(): | |
clear_button = gr.ClearButton() | |
translate_button = gr.Button("Translate", variant="primary") | |
with gr.Column(): | |
kumaoni_output = gr.Textbox(lines=5, label="Kumaoni Translation", interactive=False) | |
translate_button.click( | |
fn=hybrid_translate, | |
inputs=english_input, | |
outputs=kumaoni_output | |
) | |
clear_button.add([english_input, kumaoni_output]) | |
demo.launch() |