Pranava Kailash commited on
Commit
6c7907e
·
1 Parent(s): 30b1605

Deploy CyNER 2.0 NER App to Hugging Face V1.0

Browse files
Files changed (2) hide show
  1. app.py +87 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
3
+
4
+ # Load model and tokenizer
5
+ path_to_checkpoint = 'PranavaKailash/CyNER-2.0-DeBERTa-v3-base'
6
+ tokenizer = AutoTokenizer.from_pretrained(path_to_checkpoint, use_fast=True, max_length=768)
7
+ model = AutoModelForTokenClassification.from_pretrained(path_to_checkpoint)
8
+ ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
9
+
10
+ def tag_sentence(sentence, entities_dict):
11
+ """
12
+ Add HTML tags to entities for visualization.
13
+ """
14
+ all_entities = sorted(
15
+ [(e['start'], e['end'], e['entity'], e['word']) for ents in entities_dict.values() for e in ents],
16
+ key=lambda x: x[0]
17
+ )
18
+
19
+ merged_entities = []
20
+ current_entity = None
21
+
22
+ for start, end, entity_type, word in all_entities:
23
+ if current_entity is None:
24
+ current_entity = [start, end, entity_type, word]
25
+ else:
26
+ if start == current_entity[1] and entity_type == current_entity[2] and entity_type.startswith('I-'):
27
+ current_entity[1] = end
28
+ current_entity[3] += word.replace('▁', ' ')
29
+ else:
30
+ merged_entities.append(tuple(current_entity))
31
+ current_entity = [start, end, entity_type, word]
32
+
33
+ if current_entity:
34
+ merged_entities.append(tuple(current_entity))
35
+
36
+ tagged_sentence = ""
37
+ last_idx = 0
38
+
39
+ for start, end, entity_type, _ in merged_entities:
40
+ tagged_sentence += sentence[last_idx:start]
41
+ entity_tag = entity_type.replace('I-', 'B-')
42
+ tagged_sentence += f"<span style='color:blue'><{entity_tag}></span>{sentence[start:end]}<span style='color:blue'>/{entity_tag}></span>"
43
+ last_idx = end
44
+
45
+ tagged_sentence += sentence[last_idx:]
46
+ return tagged_sentence
47
+
48
+ def perform_ner(text):
49
+ """
50
+ Run NER pipeline and prepare results for display.
51
+ """
52
+ entities = ner_pipeline(text)
53
+ entities_dict = {}
54
+ for entity in entities:
55
+ entity_type = entity['entity']
56
+ if entity_type not in entities_dict:
57
+ entities_dict[entity_type] = []
58
+ entities_dict[entity_type].append({
59
+ "entity": entity['entity'],
60
+ "score": entity['score'],
61
+ "index": entity['index'],
62
+ "word": entity['word'],
63
+ "start": entity['start'],
64
+ "end": entity['end']
65
+ })
66
+
67
+ tagged_sentence = tag_sentence(text, entities_dict)
68
+ return entities_dict, tagged_sentence
69
+
70
+ # Streamlit UI
71
+ st.title("CyNER 2.0 - Named Entity Recognition")
72
+ st.write("Enter text to get named entity recognition results.")
73
+
74
+ input_text = st.text_area("Input Text", "Type your text here...")
75
+
76
+ if st.button("Analyze"):
77
+ if input_text.strip():
78
+ entities_dict, tagged_sentence = perform_ner(input_text)
79
+
80
+ # Display results
81
+ st.subheader("Tagged Entities")
82
+ st.markdown(tagged_sentence, unsafe_allow_html=True)
83
+
84
+ st.subheader("Entities and Details")
85
+ st.json(entities_dict)
86
+ else:
87
+ st.warning("Please enter some text for analysis.")
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers==4.12.2
2
+ torch