cindyangelira's picture
Update app.py
a9b9054 verified
raw
history blame
2.43 kB
import gradio as gr
from transformers import pipeline
from spacy import displacy
# load model pipeline globally
ner_pipe = pipeline("token-classification", model="cindyangelira/ner-roberta-large-bahasa-indonesia-finetuned", aggregation_strategy = "simple")
# define colors for each tag
def get_colors():
return {
"O": "#ffffff", # White for 'O'
"PER": "#ffadad", # Light red for 'PERSON'
"LOC": "#ffda83", # Light yellow for 'LOCATION'
"DATE_TIME": "#ffa500", # Light orange for 'DOB'
"EMAIL": "#85e0e0", # Light cyan for 'EMAIL'
"GENDER": "#c3c3e0", # Light gray for 'GENDER'
"SSN": "#800080", # Purple for 'ID'
"PHONE": "#d1ff85" # Light green for 'PHONE NUMBER'
}
def process_prediction(text, pred):
colors = get_colors()
combined_ents = [] # initialize an empty list to store combined entities
current_ent = None # var to track current entitiy
for token in pred:
token_label = token['entity_group'] #.replace('B-', '').replace('I-', '')
token_start = token['start']
token_end = token['end']
if current_ent is None or current_ent['label'] != token_label:
if current_ent:
combined_ents.append(current_ent)
current_ent = {
'start': token_start,
'end': token_end,
'label': token_label
}
else:
current_ent['end'] = token_end
if current_ent:
combined_ents.append(current_ent) # add the last entity after the loop finishes
doc = { # doc for viz
"text": text,
"ents": combined_ents,
"title": None
}
options = {"ents": list(colors.keys()), "colors": colors}
html = displacy.render(doc, style="ent", manual=True, options=options)
return html
def ner_visualization(text):
predictions = ner_pipe(text)
return process_prediction(text, predictions)
def build_interface():
iface = gr.Interface(
fn=ner_visualization,
inputs=gr.Textbox(label="Input Text"),
outputs="html",
title="NER Bahasa Indonesia",
description="Enter text to see named entity recognition results highlighted."
)
return iface
if __name__ == "__main__":
app = build_interface()
app.launch()