PuoBERTaSpace

Sleeping

App Files Files Community

vukosi commited on May 9

Commit

4948d8f

verified ·

1 Parent(s): 295300a

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -9

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import streamlit as st
 from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
 import pandas as pd
@@ -8,13 +10,7 @@ st.set_page_config(layout="wide")
 # -------------------- UI HEADER --------------------
 st.image("logo_transparent_small.png", use_column_width="always")
-st.title("Demo for Setswana NER Models")
-st.markdown("""
-A Setswana Language Model fine-tuned on MasakhaNER-2 for Named Entity Recognition.
-**Co-authors**: Vukosi Marivate (@vukosi), Moseli Mots'Oehli (@MoseliMotsoehli), Valencia Wagner, Richard Lastrucci, and Isheanesu Dzingirai
-**Model link**: [arXiv:2310.09141](https://arxiv.org/abs/2310.09141)
-""")
 # -------------------- MODEL SELECTION --------------------
 model_list = ['dsfsi/PuoBERTa-NER']
@@ -42,12 +38,14 @@ def get_input_text():
 input_text = get_input_text()
 @st.cache_resource
 def load_ner_pipeline(model_checkpoint, strategy):
     tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
     model = AutoModelForTokenClassification.from_pretrained(model_checkpoint)
     return pipeline("token-classification", model=model, tokenizer=tokenizer, aggregation_strategy=strategy)
 def merge_entities(output):
     merged = []
     for i, ent in enumerate(output):
@@ -58,6 +56,7 @@ def merge_entities(output):
             merged.append(ent)
     return merged
 if st.button("Run NER") and input_text.strip():
     with st.spinner("Running NER..."):
         ner = load_ner_pipeline(model_checkpoint, aggregation_strategy)
@@ -69,7 +68,6 @@ if st.button("Run NER") and input_text.strip():
             st.subheader("Recognized Entities")
             st.dataframe(df)
-            # -------------------- SPACY STYLE VISUAL --------------------
             spacy_display = {"text": input_text, "ents": [], "title": None}
             for ent in entities:
                 label = ent["entity_group"]
@@ -81,4 +79,26 @@ if st.button("Run NER") and input_text.strip():
             styled_html = f"<style>mark.entity {{ display: inline-block; }}</style><div style='overflow-x:auto;'>{html}</div>"
             st.markdown(styled_html, unsafe_allow_html=True)
         else:
-            st.info("No entities recognized in the input.")

+# Refactored Streamlit App for Setswana NER using HuggingFace Models
 import streamlit as st
 from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
 import pandas as pd
 # -------------------- UI HEADER --------------------
 st.image("logo_transparent_small.png", use_column_width="always")
+st.title("Demo for Setswana PuoBERTa NER Model")
 # -------------------- MODEL SELECTION --------------------
 model_list = ['dsfsi/PuoBERTa-NER']
 input_text = get_input_text()
+# -------------------- MODEL LOADING --------------------
 @st.cache_resource
 def load_ner_pipeline(model_checkpoint, strategy):
     tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
     model = AutoModelForTokenClassification.from_pretrained(model_checkpoint)
     return pipeline("token-classification", model=model, tokenizer=tokenizer, aggregation_strategy=strategy)
+# -------------------- ENTITY MERGE --------------------
 def merge_entities(output):
     merged = []
     for i, ent in enumerate(output):
             merged.append(ent)
     return merged
+# -------------------- RUN NER --------------------
 if st.button("Run NER") and input_text.strip():
     with st.spinner("Running NER..."):
         ner = load_ner_pipeline(model_checkpoint, aggregation_strategy)
             st.subheader("Recognized Entities")
             st.dataframe(df)
             spacy_display = {"text": input_text, "ents": [], "title": None}
             for ent in entities:
                 label = ent["entity_group"]
             styled_html = f"<style>mark.entity {{ display: inline-block; }}</style><div style='overflow-x:auto;'>{html}</div>"
             st.markdown(styled_html, unsafe_allow_html=True)
         else:
+            st.info("No entities recognized in the input.")
+# -------------------- AUTHORS, CITATION & FEEDBACK --------------------
+st.markdown("""
+---
+### 📚 Authors & Citation
+**Authors**
+Vukosi Marivate, Moseli Mots'Oehli, Valencia Wagner, Richard Lastrucci, Isheanesu Dzingirai
+**Citation**
+```bibtex
+@inproceedings{marivate2023puoberta,
+  title   = {PuoBERTa: Training and evaluation of a curated language model for Setswana},
+  author  = {Vukosi Marivate and Moseli Mots'Oehli and Valencia Wagner and Richard Lastrucci and Isheanesu Dzingirai},
+  year    = {2023},
+  booktitle= {Artificial Intelligence Research. SACAIR 2023. Communications in Computer and Information Science},
+  url= {https://link.springer.com/chapter/10.1007/978-3-031-49002-6_17},
+  keywords = {NLP},
+  preprint_url = {https://arxiv.org/abs/2310.09141},
+  dataset_url = {https://github.com/dsfsi/PuoBERTa},
+  software_url = {https://huggingface.co/dsfsi/PuoBERTa}
+}""")