Spaces:
Running
Running
import gradio as gr | |
import torch | |
from transformers import AutoTokenizer, AutoModelForTokenClassification | |
import os | |
token = os.getenv("HF_TOKEN") | |
tokenizer = AutoTokenizer.from_pretrained("Kantkamal/Gujarati-BERT-NER") | |
model = AutoModelForTokenClassification.from_pretrained("Kantkamal/Gujarati-BERT-NER") | |
def get_ner(sentence): | |
tok_sentence = tokenizer(sentence, return_tensors='pt') | |
with torch.no_grad(): | |
logits = model(**tok_sentence).logits.argmax(-1) | |
predicted_tokens_classes = [ | |
model.config.id2label[t.item()] for t in logits[0]] | |
predicted_labels = [] | |
previous_token_id = 0 | |
word_ids = tok_sentence.word_ids() | |
for word_index in range(len(word_ids)): | |
if word_ids[word_index] == None: | |
previous_token_id = word_ids[word_index] | |
elif word_ids[word_index] == previous_token_id: | |
previous_token_id = word_ids[word_index] | |
else: | |
predicted_labels.append(predicted_tokens_classes[word_index]) | |
previous_token_id = word_ids[word_index] | |
ner_output = [] | |
for index in range(len(sentence.split(' '))): | |
ner_output.append( | |
(sentence.split(' ')[index], predicted_labels[index])) | |
return ner_output | |
iface = gr.Interface(get_ner, | |
gr.Textbox(placeholder="Enter sentence here..."), | |
["highlight"], description='The language covered by Gujarati-BERT-NER is: Gujarati .', | |
examples=['નડિયાદમાં જન્મેલા સરદાર વલ્લભભાઈ પટેલ ભારતીય બંધારણસભાના સભ્ય હતા.'], title='Gujarati-BERT-NER', | |
article='Gujarati-BERT-NER is a fine-tuned Named Entity Recognition (NER) model for the Gujarati language based on the GujaratiBERT model. It has been trained on the Naamapadam dataset.') | |
iface.launch() |