import gradio as gr
from transformers import pipeline
from spacy import displacy

# load model pipeline globally 
ner_pipe = pipeline("token-classification", model="cindyangelira/ner-roberta-large-bahasa-indonesia-finetuned", aggregation_strategy = "simple")

# define colors for each tag
def get_colors():
    return {
        "O": "#ffffff",            # White for 'O'
        "PER": "#ffadad",      # Light red for 'PERSON'
        "LOC": "#ffda83",    # Light yellow for 'LOCATION'
        "DATE_TIME": "#ffa500",         # Light orange for 'DOB'
        "EMAIL": "#85e0e0",       # Light cyan for 'EMAIL'
        "GENDER": "#c3c3e0",      # Light gray for 'GENDER'
        "SSN": "#800080",          # Purple for 'ID'
        "PHONE": "#d1ff85" # Light green for 'PHONE NUMBER'
    }


def process_prediction(text, pred):
    colors = get_colors()
    combined_ents = []     # initialize an empty list to store combined entities

    current_ent = None # var to track current entitiy

    for token in pred:
        token_label = token['entity_group'] #.replace('B-', '').replace('I-', '')
        token_start = token['start']
        token_end = token['end']

        if current_ent is None or current_ent['label'] != token_label:
            if current_ent:
                combined_ents.append(current_ent) 
            current_ent = {
                'start': token_start,
                'end': token_end,
                'label': token_label
            }
        else:
            current_ent['end'] = token_end

    if current_ent: 
        combined_ents.append(current_ent) # add the last entity after the loop finishes


    doc = {  # doc for viz
        "text": text,
        "ents": combined_ents,
        "title": None
    }

    options = {"ents": list(colors.keys()), "colors": colors}
    html = displacy.render(doc, style="ent", manual=True, options=options)
    return html

def ner_visualization(text):
    predictions = ner_pipe(text)  
    return process_prediction(text, predictions)

def build_interface():
    iface = gr.Interface(
        fn=ner_visualization,                 
        inputs=gr.Textbox(label="Input Text"),
        outputs="html",                      
        title="NER Bahasa Indonesia",           
        description="Enter text to see named entity recognition results highlighted."
    )
    return iface


if __name__ == "__main__":
    app = build_interface()
    app.launch()