Spaces:

GroNLP
/

agalma

Running

App Files Files Community

Mark7549 commited on May 22, 2024

Commit

dde99f4

1 Parent(s): 6640785

updated front end

Browse files

Files changed (3) hide show

.streamlit/config.toml +2 -0
app.py +266 -243
images/AGALMA_logo.png +0 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [theme]
2	+ primaryColor="B8E52B"

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import json
 from streamlit_tags import st_tags, st_tags_sidebar
-st.set_page_config(page_title="Ancient Greek Word2Vec", layout="centered")
 # Cache data
 @st.cache_data
@@ -46,6 +46,8 @@ models_for_word_dict = load_models_for_word_dict()
 lemma_counts = load_lemma_count_dict()
 # Set styles for menu
 styles = {
     "container": {"display": "flex", "justify-content": "center"},
@@ -70,289 +72,310 @@ styles = {
         "color": "#000"
     },
     "nav-link-selected": {
-        "background-color": "rgb(254, 74, 75)",
         "color": "white",
         "font-weight": "bold"
     },
     "icon": {"display": "None"}
 }
-# Horizontal menu
-active_tab = option_menu(None, ["Nearest neighbours", "Cosine similarity", "3D graph", 'Dictionary', 'About', 'FAQ'],
-    menu_icon="cast", default_index=0, orientation="horizontal", styles=styles)
-# Adding CSS style to remove list-style-type
-st.markdown("""
-<style>
-/* Define a class to remove list-style-type */
-.no-list-style {
-    list-style-type: none;
-}
-</style>
-""", unsafe_allow_html=True)
-# Nearest neighbours tab
-if active_tab == "Nearest neighbours":
-    # All models in a list
-    eligible_models = ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]
-    all_models_words = load_all_models_words()
-    with st.container():
-        st.markdown("## Nearest Neighbours")
-        st.markdown('Here you can extract the nearest neighbours to a chosen lemma. Please select one or more time slices and the preferred number of nearest neighbours.')
-        target_word = st.multiselect("Enter a word", options=all_models_words, max_selections=1)
-        if len(target_word) > 0:
-            target_word = target_word[0]
-            eligible_models = models_for_word_dict[target_word]
-        models = st.multiselect(
-            "Select models to search for neighbours",
-            eligible_models
-            )
-        n = st.slider("Number of neighbours", 1, 50, 15)
-        nearest_neighbours_button = st.button("Find nearest neighbours")
-    if nearest_neighbours_button:
-        if validate_nearest_neighbours(target_word, n, models) == False:
-            st.error('Please fill in all fields')
-        else:
-            # Rewrite models to list of all loaded models
-            models = load_selected_models(models)
-            nearest_neighbours = get_nearest_neighbours(target_word, n, models)
-            all_dfs = []
-            # Create dataframes
-            for model in nearest_neighbours.keys():
-                st.write(f"### {model}")
-                df = pd.DataFrame(
-                    nearest_neighbours[model],
-                    columns = ['Word', 'Cosine Similarity']
                 )
-                # Add word occurences to dataframe
-                df['Occurences'] = df['Word'].apply(lambda x: lemma_counts[model][x])
-                all_dfs.append((model, df))
-                st.table(df)
-            # Store content in a temporary file
-            tmp_file = store_df_in_temp_file(all_dfs)
-            # Open the temporary file and read its content
-            with open(tmp_file, "rb") as file:
-                file_byte = file.read()
-                # Create download button
-                st.download_button(
-                    "Download results",
-                    data=file_byte,
-                    file_name = f'nearest_neighbours_{target_word}_TEST.xlsx',
-                    mime='application/octet-stream'
                     )
-# Cosine similarity tab
-elif active_tab == "Cosine similarity":
-    all_models_words = load_all_models_words()
-    with st.container():
-        eligible_models_1 = []
-        eligible_models_2 = []
-        st.markdown("## Cosine similarity")
-        st.markdown('Here you can extract the cosine similarity between two lemmas. Please select a time slice for each lemma. You can also calculate the cosine similarity between two vectors of the same lemma in different time slices.')
-        col1, col2 = st.columns(2)
-        col3, col4 = st.columns(2)
-        with col1:
-            word_1 = st.multiselect("Enter a word", placeholder="πατήρ", max_selections=1, options=all_models_words)
-            if len(word_1) > 0:
-                word_1 = word_1[0]
-                eligible_models_1 = models_for_word_dict[word_1]
-        with col2:
-            time_slice_1 = st.selectbox("Time slice word 1", options = eligible_models_1)
         with st.container():
-            with col3:
-                word_2 = st.multiselect("Enter a word", placeholder="μήτηρ", max_selections=1, options=all_models_words)
-                if len(word_2) > 0:
-                    word_2 = word_2[0]
-                    eligible_models_2 = models_for_word_dict[word_2]
-            with col4:
-                time_slice_2 = st.selectbox("Time slice word 2", eligible_models_2)
-        # Create button for calculating cosine similarity
-        cosine_similarity_button = st.button("Calculate cosine similarity")
-    # If the button is clicked, execute calculation
-    if cosine_similarity_button:
-        cosine_simularity_score = get_cosine_similarity(word_1, time_slice_1, word_2, time_slice_2)
-        st.write(cosine_simularity_score)
-# 3D graph tab
-elif active_tab == "3D graph":
-    st.markdown("## 3D graph")
-    st.markdown('Here you can generate a 3D representation of the semantic space surrounding a target lemma. Please choose the lemma and the time slice.')
-    col1, col2 = st.columns(2)
-    # Load compressed word list
-    all_models_words = load_all_models_words()
-    with st.container():
-        eligible_models = []
-        with col1:
-            word = st.multiselect("Enter a word", all_models_words, max_selections=1)
-            if len(word) > 0:
-                word = word[0]
-                eligible_models = models_for_word_dict[word]
-        with col2:
-            time_slice = st.selectbox("Time slice", eligible_models)
-        n = st.slider("Number of words", 1, 50, 15)
-        graph_button = st.button("Create 3D graph")
-        if graph_button:
-            time_slice_model = convert_time_name_to_model(time_slice)
-            nearest_neighbours_vectors = get_nearest_neighbours_vectors(word, time_slice_model, n)
-            fig, df = make_3d_plot_tSNE(nearest_neighbours_vectors, word, time_slice_model)
-            st.plotly_chart(fig)
-# Dictionary tab
-elif active_tab == "Dictionary":
-    with st.container():
-        st.markdown('## Dictionary')
-        st.markdown('Search a word in the Liddell-Scott-Jones dictionary (only Greek, no whitespaces).')
-        all_lemmas = load_all_lemmas()
-        # query_word = st.multiselect("Search a word in the LSJ dictionary", all_lemmas, max_selections=1)
-        query_tag = st_tags(label='',
-                            text = '',
-                            value = [],
-                            suggestions = all_lemmas,
-                            maxtags = 1,
-                            key = '1'
-                            )
-        # If a word has been selected by user
-        if query_tag:
-            st.write(f"### {query_tag[0]}")
-            # Display word information
-            if query_tag[0] in lemma_dict:
-                data = lemma_dict[query_tag[0]]
-            elif query_tag[0].capitalize() in lemma_dict: # Some words are capitalized in the dictionary
-                data = lemma_dict[query_tag[0].capitalize()]
-            else:
-                st.error("Word not found in dictionary")
-            # Put text in readable format
-            text = format_text(data)
-            st.markdown(format_text(data), unsafe_allow_html = True)
-            st.markdown("""
-                        <style>
-                        .tab {
-                            display: inline-block;
-                            margin-left: 4em;
-                        }
-                        .tr {
-                            font-weight: bold;
-                        }
-                        .list-class {
-                            list-style-type: none;
-                            margin-top: 1em;
-                        }
-                        .primary-indicator {
-                            font-weight: bold;
-                            font-size: x-large;
-                        }
-                        .secondary-indicator {
-                            font-weight: bold;
-                            font-size: large;
-                        }
-                        .tertiary-indicator {
-                            font-weight: bold;
-                            font-size: medium;
-                        }
-                        .quaternary-indicator {
-                            font-weight: bold;
-                            font-size: medium;
-                        }
-                        .primary-class {
-                            padding-left: 2em;
-                        }
-                        .secondary-class {
-                            padding-left: 4em;
-                        }
-                        .tertiary-class {
-                            padding-left: 6em;
-                        }
-                        .quaternary-class {
-                            padding-left: 8em;
-                        }
-                        </style>
-                        """, unsafe_allow_html=True)
-# About tab
-elif active_tab == "About":
     st.markdown("""
         ## About
         Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam nec purus nec nunc ultricies ultricies.
         """)
-elif active_tab == "FAQ":
-    st.markdown("""
-        ## FAQ
-        """)
-    with st.expander('''**Which models is this interface based on?**'''):
-        st.write(
-                "This interface is based on five language models. \
-                Language models are statistical models of language, \
-                which store statistical information about word co-occurrence during the training phase. \
-                During training they process a corpus of texts in the target language(s). \
-                Once trained, models can be used to extract information about the language \
-                (in this interface, we focus on the extraction of semantic information) or to perform specific linguistic tasks. \
-                The models on which this interface is based are Word Embedding models."
-                )
-    with st.expander('''**Which corpus was used to train the models?**'''):
-        st.write(
-            "The five models on which this interface is based were trained on five slices of the Diorisis Ancient Greek Corpus (Vatri & McGillivray 2018)."
-        )

 from streamlit_tags import st_tags, st_tags_sidebar
+st.set_page_config(page_title="ἄγαλμα | AGALMA", layout="centered")
 # Cache data
 @st.cache_data
 lemma_counts = load_lemma_count_dict()
 # Set styles for menu
 styles = {
     "container": {"display": "flex", "justify-content": "center"},
         "color": "#000"
     },
     "nav-link-selected": {
+        "background-color": "#B8E52B",
         "color": "white",
         "font-weight": "bold"
     },
     "icon": {"display": "None"}
 }
+with st.sidebar:
+    st.image('images/AGALMA_logo.png', width=250)
+    st.markdown('# ἄγαλμα | AGALMA')
+    selected = option_menu(None, ["App", "About", "FAQ", "License"],
+                           menu_icon="menu", default_index=0, orientation="vertical")
+if selected == "App":
+    # Horizontal menu
+    active_tab = option_menu(None, ["Nearest neighbours", "Cosine similarity", "3D graph", 'Dictionary'],
+        menu_icon="cast", default_index=0, orientation="horizontal", styles=styles)
+    # Adding CSS style to remove list-style-type
+    st.markdown("""
+    <style>
+    /* Define a class to remove list-style-type */
+    .no-list-style {
+        list-style-type: none;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+    # Nearest neighbours tab
+    if active_tab == "Nearest neighbours":
+        # All models in a list
+        eligible_models = ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]
+        all_models_words = load_all_models_words()
+        with st.container():
+            st.markdown("## Nearest Neighbours")
+            st.markdown('Here you can extract the nearest neighbours to a chosen lemma. Please select one or more time slices and the preferred number of nearest neighbours.')
+            target_word = st.multiselect("Enter a word", options=all_models_words, max_selections=1)
+            if len(target_word) > 0:
+                target_word = target_word[0]
+                eligible_models = models_for_word_dict[target_word]
+            models = st.multiselect(
+                "Select models to search for neighbours",
+                eligible_models
                 )
+            n = st.slider("Number of neighbours", 1, 50, 15)
+            nearest_neighbours_button = st.button("Find nearest neighbours")
+        if nearest_neighbours_button:
+            if validate_nearest_neighbours(target_word, n, models) == False:
+                st.error('Please fill in all fields')
+            else:
+                # Rewrite models to list of all loaded models
+                models = load_selected_models(models)
+                nearest_neighbours = get_nearest_neighbours(target_word, n, models)
+                all_dfs = []
+                # Create dataframes
+                for model in nearest_neighbours.keys():
+                    st.write(f"### {model}")
+                    df = pd.DataFrame(
+                        nearest_neighbours[model],
+                        columns = ['Word', 'Cosine Similarity']
                     )
+                    # Add word occurences to dataframe
+                    df['Occurences'] = df['Word'].apply(lambda x: lemma_counts[model][x])
+                    all_dfs.append((model, df))
+                    st.table(df)
+                # Store content in a temporary file
+                tmp_file = store_df_in_temp_file(all_dfs)
+                # Open the temporary file and read its content
+                with open(tmp_file, "rb") as file:
+                    file_byte = file.read()
+                    # Create download button
+                    st.download_button(
+                        "Download results",
+                        data=file_byte,
+                        file_name = f'nearest_neighbours_{target_word}_TEST.xlsx',
+                        mime='application/octet-stream'
+                        )
+    # Cosine similarity tab
+    elif active_tab == "Cosine similarity":
+        all_models_words = load_all_models_words()
+        with st.container():
+            eligible_models_1 = []
+            eligible_models_2 = []
+            st.markdown("## Cosine similarity")
+            st.markdown('Here you can extract the cosine similarity between two lemmas. Please select a time slice for each lemma. You can also calculate the cosine similarity between two vectors of the same lemma in different time slices.')
+            col1, col2 = st.columns(2)
+            col3, col4 = st.columns(2)
+            with col1:
+                word_1 = st.multiselect("Enter a word", placeholder="πατήρ", max_selections=1, options=all_models_words)
+                if len(word_1) > 0:
+                    word_1 = word_1[0]
+                    eligible_models_1 = models_for_word_dict[word_1]
+            with col2:
+                time_slice_1 = st.selectbox("Time slice word 1", options = eligible_models_1)
+            with st.container():
+                with col3:
+                    word_2 = st.multiselect("Enter a word", placeholder="μήτηρ", max_selections=1, options=all_models_words)
+                    if len(word_2) > 0:
+                        word_2 = word_2[0]
+                        eligible_models_2 = models_for_word_dict[word_2]
+                with col4:
+                    time_slice_2 = st.selectbox("Time slice word 2", eligible_models_2)
+            # Create button for calculating cosine similarity
+            cosine_similarity_button = st.button("Calculate cosine similarity")
+        # If the button is clicked, execute calculation
+        if cosine_similarity_button:
+            cosine_simularity_score = get_cosine_similarity(word_1, time_slice_1, word_2, time_slice_2)
+            st.write(cosine_simularity_score)
+    # 3D graph tab
+    elif active_tab == "3D graph":
+        st.markdown("## 3D graph")
+        st.markdown('Here you can generate a 3D representation of the semantic space surrounding a target lemma. Please choose the lemma and the time slice.')
+        col1, col2 = st.columns(2)
+        # Load compressed word list
+        all_models_words = load_all_models_words()
         with st.container():
+            eligible_models = []
+            with col1:
+                word = st.multiselect("Enter a word", all_models_words, max_selections=1)
+                if len(word) > 0:
+                    word = word[0]
+                    eligible_models = models_for_word_dict[word]
+            with col2:
+                time_slice = st.selectbox("Time slice", eligible_models)
+            n = st.slider("Number of words", 1, 50, 15)
+            graph_button = st.button("Create 3D graph")
+            if graph_button:
+                time_slice_model = convert_time_name_to_model(time_slice)
+                nearest_neighbours_vectors = get_nearest_neighbours_vectors(word, time_slice_model, n)
+                fig, df = make_3d_plot_tSNE(nearest_neighbours_vectors, word, time_slice_model)
+                st.plotly_chart(fig)
+    # Dictionary tab
+    elif active_tab == "Dictionary":
+        with st.container():
+            st.markdown('## Dictionary')
+            st.markdown('Search a word in the Liddell-Scott-Jones dictionary (only Greek, no whitespaces).')
+            all_lemmas = load_all_lemmas()
+            # query_word = st.multiselect("Search a word in the LSJ dictionary", all_lemmas, max_selections=1)
+            query_tag = st_tags(label='',
+                                text = '',
+                                value = [],
+                                suggestions = all_lemmas,
+                                maxtags = 1,
+                                key = '1'
+                                )
+            # If a word has been selected by user
+            if query_tag:
+                st.write(f"### {query_tag[0]}")
+                # Display word information
+                if query_tag[0] in lemma_dict:
+                    data = lemma_dict[query_tag[0]]
+                elif query_tag[0].capitalize() in lemma_dict: # Some words are capitalized in the dictionary
+                    data = lemma_dict[query_tag[0].capitalize()]
+                else:
+                    st.error("Word not found in dictionary")
+                # Put text in readable format
+                text = format_text(data)
+                st.markdown(format_text(data), unsafe_allow_html = True)
+                st.markdown("""
+                            <style>
+                            .tab {
+                                display: inline-block;
+                                margin-left: 4em;
+                            }
+                            .tr {
+                                font-weight: bold;
+                            }
+                            .list-class {
+                                list-style-type: none;
+                                margin-top: 1em;
+                            }
+                            .primary-indicator {
+                                font-weight: bold;
+                                font-size: x-large;
+                            }
+                            .secondary-indicator {
+                                font-weight: bold;
+                                font-size: large;
+                            }
+                            .tertiary-indicator {
+                                font-weight: bold;
+                                font-size: medium;
+                            }
+                            .quaternary-indicator {
+                                font-weight: bold;
+                                font-size: medium;
+                            }
+                            .primary-class {
+                                padding-left: 2em;
+                            }
+                            .secondary-class {
+                                padding-left: 4em;
+                            }
+                            .tertiary-class {
+                                padding-left: 6em;
+                            }
+                            .quaternary-class {
+                                padding-left: 8em;
+                            }
+                            </style>
+                            """, unsafe_allow_html=True)
+    # About tab
+    elif active_tab == "About":
+        st.markdown("""
+            ## About
+            Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam nec purus nec nunc ultricies ultricies.
+            """)
+    elif active_tab == "FAQ":
+        st.markdown("""
+            ## FAQ
+            """)
+        with st.expander('''**Which models is this interface based on?**'''):
+            st.write(
+                    "This interface is based on five language models. \
+                    Language models are statistical models of language, \
+                    which store statistical information about word co-occurrence during the training phase. \
+                    During training they process a corpus of texts in the target language(s). \
+                    Once trained, models can be used to extract information about the language \
+                    (in this interface, we focus on the extraction of semantic information) or to perform specific linguistic tasks. \
+                    The models on which this interface is based are Word Embedding models."
+                    )
+        with st.expander('''**Which corpus was used to train the models?**'''):
+            st.write(
+                "The five models on which this interface is based were trained on five slices of the Diorisis Ancient Greek Corpus (Vatri & McGillivray 2018)."
+            )
+if selected == "About":
     st.markdown("""
         ## About
         Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam nec purus nec nunc ultricies ultricies.
         """)
+streamlit_style = """
+            <style>
+            html, body {
+                font-family: 'Helvetica';
+            }
+            </style>
+            """
+st.markdown(streamlit_style, unsafe_allow_html=True)

images/AGALMA_logo.png ADDED Viewed