Spaces:

Kantkamal
/

Gujarati-BERT-NER

Running

App Files Files Community

Kantkamal commited on 8 days ago

Commit

38166d9

verified ·

1 Parent(s): 6c04efb

Upload 2 files

Browse files

Files changed (2) hide show

app.py +44 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForTokenClassification
+tokenizer = AutoTokenizer.from_pretrained("Kantkamal/Gujarati-BERT-NER")
+model = AutoModelForTokenClassification.from_pretrained("Kantkamal/Gujarati-BERT-NER")
+def get_ner(sentence):
+    tok_sentence = tokenizer(sentence, return_tensors='pt')
+    with torch.no_grad():
+        logits = model(**tok_sentence).logits.argmax(-1)
+        predicted_tokens_classes = [
+            model.config.id2label[t.item()] for t in logits[0]]
+        predicted_labels = []
+        previous_token_id = 0
+        word_ids = tok_sentence.word_ids()
+        for word_index in range(len(word_ids)):
+            if word_ids[word_index] == None:
+                previous_token_id = word_ids[word_index]
+            elif word_ids[word_index] == previous_token_id:
+                previous_token_id = word_ids[word_index]
+            else:
+                predicted_labels.append(predicted_tokens_classes[word_index])
+                previous_token_id = word_ids[word_index]
+        ner_output = []
+        for index in range(len(sentence.split(' '))):
+            ner_output.append(
+                (sentence.split(' ')[index], predicted_labels[index]))
+        return ner_output
+iface = gr.Interface(get_ner,
+                     gr.Textbox(placeholder="Enter sentence here..."),
+                     ["highlight"], description='The language covered by Gujarati-BERT-NER is: Gujarati .',
+                     example=['નડિયાદમાં જન્‍મેલા સરદાર વલ્લભભાઈ પટેલ ભારતીય બંધારણસભાના સભ્ય હતા.'], title='Gujarati-BERT-NER',
+                     article='Gujarati-BERT-NER is a fine-tuned Named Entity Recognition (NER) model for the Gujarati language based on the GujaratiBERT model. It has been trained on the Naamapadam dataset.')
+iface.launch(enable_queue=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+transformers
+torch
+gradio
+sentencepiece==0.1.95
+datasets
+seqeval
+safetensors