Spaces:
Runtime error
Runtime error
| import random | |
| import spacy | |
| import srsly | |
| import streamlit as st | |
| nlp = spacy.load("en_core_web_trf") | |
| # Load pre-processed grants from disk. | |
| grants = list(srsly.read_jsonl("data/processed/entities.jsonl")) | |
| colors = {"GPE": "#5cff84", "LOC": "#5cff84"} | |
| options = {"ents": ["GPE", "LOC"], "colors": colors} | |
| HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>""" | |
| def render_entities(doc, colors: dict, options: dict) -> str: | |
| """ | |
| Takes a SpaCy doc and renders the entities with the given colors. | |
| """ | |
| html = spacy.displacy.render(doc, style="ent", options=options) | |
| html = html.replace("\n", " ") | |
| return html | |
| def show_example(text): | |
| html = render_entities(doc, colors, options) | |
| st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True) | |
| return text | |
| st.header("Location Recognition Demo πππ") | |
| st.sidebar.header("Information βΉοΈ ") | |
| st.sidebar.markdown( | |
| """ | |
| This example application accompanies the blog post: [Extracting useful information from documents with Named Entity Recognition](https://medium.com/@reproducible/extracting-useful-information-from-documents-with-named-entity-recognition-4e009b60a8c). | |
| It uses a pre-trained Named Entity Recognition (NER) model from the [spaCy](https://spacy.io/) library to extract locations from your own examples, or a sample of grant applications from The Wellcome Trust. | |
| The application will extract the following types of location entity: | |
| * __GPE__: Geopolitical entities (countries, cities, states) | |
| * __LOC__: Locations (mountains, rivers, lakes) | |
| This model will innevitably make some mistakes; it was trained on a large generic corpus of text, and the Wellcome Trust grant applications come from a very specific domain. We could improve this model by fine-tuning it on data from this domain. | |
| """ | |
| ) | |
| if st.button("Show Wellcome example", key="text"): | |
| sample = random.choice(grants) | |
| text = st.text_area( | |
| "Add your own text or click the button to see a Wellcome example", | |
| value=sample["text"], | |
| height=200, | |
| help="Enter your own text and press CTRL + ENTER to search for entities", | |
| ) | |
| doc = nlp(text) | |
| show_example(text) | |
| else: | |
| text = st.text_area( | |
| "Add your own text or click the button to see a Wellcome example", | |
| value="Enter your text here", | |
| height=200, | |
| help="Enter your own text and press CTRL + ENTER to search for entities", | |
| ) | |
| doc = nlp(text) | |
| show_example(text) | |
| st.markdown( | |
| "Examples from The Wellcome Trust are taken from data that are publishes openly at [360 Giving](https://data.threesixtygiving.org/). They are published under a [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) license." | |
| ) | |