Kantkamal commited on
Commit
38166d9
·
verified ·
1 Parent(s): 6c04efb

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +44 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
4
+
5
+ tokenizer = AutoTokenizer.from_pretrained("Kantkamal/Gujarati-BERT-NER")
6
+
7
+ model = AutoModelForTokenClassification.from_pretrained("Kantkamal/Gujarati-BERT-NER")
8
+
9
+
10
+ def get_ner(sentence):
11
+ tok_sentence = tokenizer(sentence, return_tensors='pt')
12
+
13
+ with torch.no_grad():
14
+ logits = model(**tok_sentence).logits.argmax(-1)
15
+ predicted_tokens_classes = [
16
+ model.config.id2label[t.item()] for t in logits[0]]
17
+
18
+ predicted_labels = []
19
+
20
+ previous_token_id = 0
21
+ word_ids = tok_sentence.word_ids()
22
+ for word_index in range(len(word_ids)):
23
+ if word_ids[word_index] == None:
24
+ previous_token_id = word_ids[word_index]
25
+ elif word_ids[word_index] == previous_token_id:
26
+ previous_token_id = word_ids[word_index]
27
+ else:
28
+ predicted_labels.append(predicted_tokens_classes[word_index])
29
+ previous_token_id = word_ids[word_index]
30
+
31
+ ner_output = []
32
+ for index in range(len(sentence.split(' '))):
33
+ ner_output.append(
34
+ (sentence.split(' ')[index], predicted_labels[index]))
35
+ return ner_output
36
+
37
+
38
+ iface = gr.Interface(get_ner,
39
+ gr.Textbox(placeholder="Enter sentence here..."),
40
+ ["highlight"], description='The language covered by Gujarati-BERT-NER is: Gujarati .',
41
+ example=['નડિયાદમાં જન્‍મેલા સરદાર વલ્લભભાઈ પટેલ ભારતીય બંધારણસભાના સભ્ય હતા.'], title='Gujarati-BERT-NER',
42
+ article='Gujarati-BERT-NER is a fine-tuned Named Entity Recognition (NER) model for the Gujarati language based on the GujaratiBERT model. It has been trained on the Naamapadam dataset.')
43
+
44
+ iface.launch(enable_queue=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ gradio
4
+ sentencepiece==0.1.95
5
+ datasets
6
+ seqeval
7
+ safetensors