import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification 

import os
token = os.getenv("HF_TOKEN")


tokenizer = AutoTokenizer.from_pretrained("Kantkamal/Gujarati-BERT-NER")

model = AutoModelForTokenClassification.from_pretrained("Kantkamal/Gujarati-BERT-NER")


def get_ner(sentence):
    tok_sentence = tokenizer(sentence, return_tensors='pt')

    with torch.no_grad():
        logits = model(**tok_sentence).logits.argmax(-1)
        predicted_tokens_classes = [
            model.config.id2label[t.item()] for t in logits[0]]

        predicted_labels = []

        previous_token_id = 0
        word_ids = tok_sentence.word_ids()
        for word_index in range(len(word_ids)):
            if word_ids[word_index] == None:
                previous_token_id = word_ids[word_index]
            elif word_ids[word_index] == previous_token_id:
                previous_token_id = word_ids[word_index]
            else:
                predicted_labels.append(predicted_tokens_classes[word_index])
                previous_token_id = word_ids[word_index]

        ner_output = []
        for index in range(len(sentence.split(' '))):
            ner_output.append(
                (sentence.split(' ')[index], predicted_labels[index]))
        return ner_output


iface = gr.Interface(get_ner,
                     gr.Textbox(placeholder="Enter sentence here..."),
                     ["highlight"], description='The language covered by Gujarati-BERT-NER is: Gujarati .',
                     examples=['નડિયાદમાં જન્‍મેલા સરદાર વલ્લભભાઈ પટેલ ભારતીય બંધારણસભાના સભ્ય હતા.'], title='Gujarati-BERT-NER',
                     article='Gujarati-BERT-NER is a fine-tuned Named Entity Recognition (NER) model for the Gujarati language based on the GujaratiBERT model. It has been trained on the Naamapadam dataset.') 

iface.launch()