Spaces:

PranavaKailash
/

CyNER2.0_Cyber_Entity_Recogonizer

Runtime error

App Files Files Community

Pranava Kailash commited on Oct 30, 2024

Commit

6c7907e

1 Parent(s): 30b1605

Deploy CyNER 2.0 NER App to Hugging Face V1.0

Browse files

Files changed (2) hide show

app.py +87 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+# Load model and tokenizer
+path_to_checkpoint = 'PranavaKailash/CyNER-2.0-DeBERTa-v3-base'
+tokenizer = AutoTokenizer.from_pretrained(path_to_checkpoint, use_fast=True, max_length=768)
+model = AutoModelForTokenClassification.from_pretrained(path_to_checkpoint)
+ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
+def tag_sentence(sentence, entities_dict):
+    """
+    Add HTML tags to entities for visualization.
+    """
+    all_entities = sorted(
+        [(e['start'], e['end'], e['entity'], e['word']) for ents in entities_dict.values() for e in ents],
+        key=lambda x: x[0]
+    )
+    merged_entities = []
+    current_entity = None
+    for start, end, entity_type, word in all_entities:
+        if current_entity is None:
+            current_entity = [start, end, entity_type, word]
+        else:
+            if start == current_entity[1] and entity_type == current_entity[2] and entity_type.startswith('I-'):
+                current_entity[1] = end
+                current_entity[3] += word.replace('▁', ' ')
+            else:
+                merged_entities.append(tuple(current_entity))
+                current_entity = [start, end, entity_type, word]
+    if current_entity:
+        merged_entities.append(tuple(current_entity))
+    tagged_sentence = ""
+    last_idx = 0
+    for start, end, entity_type, _ in merged_entities:
+        tagged_sentence += sentence[last_idx:start]
+        entity_tag = entity_type.replace('I-', 'B-')
+        tagged_sentence += f"<span style='color:blue'><{entity_tag}></span>{sentence[start:end]}<span style='color:blue'>/{entity_tag}></span>"
+        last_idx = end
+    tagged_sentence += sentence[last_idx:]
+    return tagged_sentence
+def perform_ner(text):
+    """
+    Run NER pipeline and prepare results for display.
+    """
+    entities = ner_pipeline(text)
+    entities_dict = {}
+    for entity in entities:
+        entity_type = entity['entity']
+        if entity_type not in entities_dict:
+            entities_dict[entity_type] = []
+        entities_dict[entity_type].append({
+            "entity": entity['entity'],
+            "score": entity['score'],
+            "index": entity['index'],
+            "word": entity['word'],
+            "start": entity['start'],
+            "end": entity['end']
+        })
+    tagged_sentence = tag_sentence(text, entities_dict)
+    return entities_dict, tagged_sentence
+# Streamlit UI
+st.title("CyNER 2.0 - Named Entity Recognition")
+st.write("Enter text to get named entity recognition results.")
+input_text = st.text_area("Input Text", "Type your text here...")
+if st.button("Analyze"):
+    if input_text.strip():
+        entities_dict, tagged_sentence = perform_ner(input_text)
+        # Display results
+        st.subheader("Tagged Entities")
+        st.markdown(tagged_sentence, unsafe_allow_html=True)
+        st.subheader("Entities and Details")
+        st.json(entities_dict)
+    else:
+        st.warning("Please enter some text for analysis.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ transformers==4.12.2
2	+ torch