vukosi commited on
Commit
4948d8f
·
verified ·
1 Parent(s): 295300a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -9
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import streamlit as st
2
  from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
3
  import pandas as pd
@@ -8,13 +10,7 @@ st.set_page_config(layout="wide")
8
 
9
  # -------------------- UI HEADER --------------------
10
  st.image("logo_transparent_small.png", use_column_width="always")
11
- st.title("Demo for Setswana NER Models")
12
- st.markdown("""
13
- A Setswana Language Model fine-tuned on MasakhaNER-2 for Named Entity Recognition.
14
-
15
- **Co-authors**: Vukosi Marivate (@vukosi), Moseli Mots'Oehli (@MoseliMotsoehli), Valencia Wagner, Richard Lastrucci, and Isheanesu Dzingirai
16
- **Model link**: [arXiv:2310.09141](https://arxiv.org/abs/2310.09141)
17
- """)
18
 
19
  # -------------------- MODEL SELECTION --------------------
20
  model_list = ['dsfsi/PuoBERTa-NER']
@@ -42,12 +38,14 @@ def get_input_text():
42
 
43
  input_text = get_input_text()
44
 
 
45
  @st.cache_resource
46
  def load_ner_pipeline(model_checkpoint, strategy):
47
  tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
48
  model = AutoModelForTokenClassification.from_pretrained(model_checkpoint)
49
  return pipeline("token-classification", model=model, tokenizer=tokenizer, aggregation_strategy=strategy)
50
 
 
51
  def merge_entities(output):
52
  merged = []
53
  for i, ent in enumerate(output):
@@ -58,6 +56,7 @@ def merge_entities(output):
58
  merged.append(ent)
59
  return merged
60
 
 
61
  if st.button("Run NER") and input_text.strip():
62
  with st.spinner("Running NER..."):
63
  ner = load_ner_pipeline(model_checkpoint, aggregation_strategy)
@@ -69,7 +68,6 @@ if st.button("Run NER") and input_text.strip():
69
  st.subheader("Recognized Entities")
70
  st.dataframe(df)
71
 
72
- # -------------------- SPACY STYLE VISUAL --------------------
73
  spacy_display = {"text": input_text, "ents": [], "title": None}
74
  for ent in entities:
75
  label = ent["entity_group"]
@@ -81,4 +79,26 @@ if st.button("Run NER") and input_text.strip():
81
  styled_html = f"<style>mark.entity {{ display: inline-block; }}</style><div style='overflow-x:auto;'>{html}</div>"
82
  st.markdown(styled_html, unsafe_allow_html=True)
83
  else:
84
- st.info("No entities recognized in the input.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Refactored Streamlit App for Setswana NER using HuggingFace Models
2
+
3
  import streamlit as st
4
  from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
5
  import pandas as pd
 
10
 
11
  # -------------------- UI HEADER --------------------
12
  st.image("logo_transparent_small.png", use_column_width="always")
13
+ st.title("Demo for Setswana PuoBERTa NER Model")
 
 
 
 
 
 
14
 
15
  # -------------------- MODEL SELECTION --------------------
16
  model_list = ['dsfsi/PuoBERTa-NER']
 
38
 
39
  input_text = get_input_text()
40
 
41
+ # -------------------- MODEL LOADING --------------------
42
  @st.cache_resource
43
  def load_ner_pipeline(model_checkpoint, strategy):
44
  tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
45
  model = AutoModelForTokenClassification.from_pretrained(model_checkpoint)
46
  return pipeline("token-classification", model=model, tokenizer=tokenizer, aggregation_strategy=strategy)
47
 
48
+ # -------------------- ENTITY MERGE --------------------
49
  def merge_entities(output):
50
  merged = []
51
  for i, ent in enumerate(output):
 
56
  merged.append(ent)
57
  return merged
58
 
59
+ # -------------------- RUN NER --------------------
60
  if st.button("Run NER") and input_text.strip():
61
  with st.spinner("Running NER..."):
62
  ner = load_ner_pipeline(model_checkpoint, aggregation_strategy)
 
68
  st.subheader("Recognized Entities")
69
  st.dataframe(df)
70
 
 
71
  spacy_display = {"text": input_text, "ents": [], "title": None}
72
  for ent in entities:
73
  label = ent["entity_group"]
 
79
  styled_html = f"<style>mark.entity {{ display: inline-block; }}</style><div style='overflow-x:auto;'>{html}</div>"
80
  st.markdown(styled_html, unsafe_allow_html=True)
81
  else:
82
+ st.info("No entities recognized in the input.")
83
+
84
+ # -------------------- AUTHORS, CITATION & FEEDBACK --------------------
85
+ st.markdown("""
86
+ ---
87
+ ### 📚 Authors & Citation
88
+
89
+ **Authors**
90
+ Vukosi Marivate, Moseli Mots'Oehli, Valencia Wagner, Richard Lastrucci, Isheanesu Dzingirai
91
+
92
+ **Citation**
93
+ ```bibtex
94
+ @inproceedings{marivate2023puoberta,
95
+ title = {PuoBERTa: Training and evaluation of a curated language model for Setswana},
96
+ author = {Vukosi Marivate and Moseli Mots'Oehli and Valencia Wagner and Richard Lastrucci and Isheanesu Dzingirai},
97
+ year = {2023},
98
+ booktitle= {Artificial Intelligence Research. SACAIR 2023. Communications in Computer and Information Science},
99
+ url= {https://link.springer.com/chapter/10.1007/978-3-031-49002-6_17},
100
+ keywords = {NLP},
101
+ preprint_url = {https://arxiv.org/abs/2310.09141},
102
+ dataset_url = {https://github.com/dsfsi/PuoBERTa},
103
+ software_url = {https://huggingface.co/dsfsi/PuoBERTa}
104
+ }""")