Spaces:
Runtime error
Runtime error
import streamlit as st | |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline | |
# Load model and tokenizer | |
path_to_checkpoint = 'PranavaKailash/CyNER-2.0-DeBERTa-v3-base' | |
tokenizer = AutoTokenizer.from_pretrained(path_to_checkpoint, use_fast=True, max_length=768) | |
model = AutoModelForTokenClassification.from_pretrained(path_to_checkpoint) | |
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer) | |
def tag_sentence(sentence, entities_dict): | |
""" | |
Add HTML tags to entities for visualization. | |
""" | |
all_entities = sorted( | |
[(e['start'], e['end'], e['entity'], e['word']) for ents in entities_dict.values() for e in ents], | |
key=lambda x: x[0] | |
) | |
merged_entities = [] | |
current_entity = None | |
for start, end, entity_type, word in all_entities: | |
if current_entity is None: | |
current_entity = [start, end, entity_type, word] | |
else: | |
if start == current_entity[1] and entity_type == current_entity[2] and entity_type.startswith('I-'): | |
current_entity[1] = end | |
current_entity[3] += word.replace('β', ' ') | |
else: | |
merged_entities.append(tuple(current_entity)) | |
current_entity = [start, end, entity_type, word] | |
if current_entity: | |
merged_entities.append(tuple(current_entity)) | |
tagged_sentence = "" | |
last_idx = 0 | |
for start, end, entity_type, _ in merged_entities: | |
tagged_sentence += sentence[last_idx:start] | |
entity_tag = entity_type.replace('I-', 'B-') | |
tagged_sentence += f"<span style='color:blue'><{entity_tag}></span>{sentence[start:end]}<span style='color:blue'>/{entity_tag}></span>" | |
last_idx = end | |
tagged_sentence += sentence[last_idx:] | |
return tagged_sentence | |
def perform_ner(text): | |
""" | |
Run NER pipeline and prepare results for display. | |
""" | |
entities = ner_pipeline(text) | |
entities_dict = {} | |
for entity in entities: | |
entity_type = entity['entity'] | |
if entity_type not in entities_dict: | |
entities_dict[entity_type] = [] | |
entities_dict[entity_type].append({ | |
"entity": entity['entity'], | |
"score": entity['score'], | |
"index": entity['index'], | |
"word": entity['word'], | |
"start": entity['start'], | |
"end": entity['end'] | |
}) | |
tagged_sentence = tag_sentence(text, entities_dict) | |
return entities_dict, tagged_sentence | |
# Streamlit UI | |
st.title("CyNER 2.0 - Named Entity Recognition") | |
st.write("Enter text to get named entity recognition results.") | |
input_text = st.text_area("Input Text", "Type your text here...") | |
if st.button("Analyze"): | |
if input_text.strip(): | |
entities_dict, tagged_sentence = perform_ner(input_text) | |
# Display results | |
st.subheader("Tagged Entities") | |
st.markdown(tagged_sentence, unsafe_allow_html=True) | |
st.subheader("Entities and Details") | |
st.json(entities_dict) | |
else: | |
st.warning("Please enter some text for analysis.") | |