Spaces:

Kantkamal
/

Gujarati-BERT-NER

Running

App Files Files Community

Gujarati-BERT-NER / app.py

Kantkamal

Update app.py

1b2f3bc verified 8 days ago

raw

history blame contribute delete

2 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForTokenClassification

	import os
	token = os.getenv("HF_TOKEN")



	tokenizer = AutoTokenizer.from_pretrained("Kantkamal/Gujarati-BERT-NER")

	model = AutoModelForTokenClassification.from_pretrained("Kantkamal/Gujarati-BERT-NER")


	def get_ner(sentence):
	tok_sentence = tokenizer(sentence, return_tensors='pt')

	with torch.no_grad():
	logits = model(**tok_sentence).logits.argmax(-1)
	predicted_tokens_classes = [
	model.config.id2label[t.item()] for t in logits[0]]

	predicted_labels = []

	previous_token_id = 0
	word_ids = tok_sentence.word_ids()
	for word_index in range(len(word_ids)):
	if word_ids[word_index] == None:
	previous_token_id = word_ids[word_index]
	elif word_ids[word_index] == previous_token_id:
	previous_token_id = word_ids[word_index]
	else:
	predicted_labels.append(predicted_tokens_classes[word_index])
	previous_token_id = word_ids[word_index]

	ner_output = []
	for index in range(len(sentence.split(' '))):
	ner_output.append(
	(sentence.split(' ')[index], predicted_labels[index]))
	return ner_output


	iface = gr.Interface(get_ner,
	gr.Textbox(placeholder="Enter sentence here..."),
	["highlight"], description='The language covered by Gujarati-BERT-NER is: Gujarati .',
	examples=['નડિયાદમાં જન્‍મેલા સરદાર વલ્લભભાઈ પટેલ ભારતીય બંધારણસભાના સભ્ય હતા.'], title='Gujarati-BERT-NER',
	article='Gujarati-BERT-NER is a fine-tuned Named Entity Recognition (NER) model for the Gujarati language based on the GujaratiBERT model. It has been trained on the Naamapadam dataset.')

	iface.launch()