|  | import streamlit as st | 
					
						
						|  | import pandas as pd | 
					
						
						|  | import streamlit.components.v1 as stc | 
					
						
						|  | import docx2txt | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | import nltk | 
					
						
						|  | from nltk.tokenize import word_tokenize | 
					
						
						|  | from nltk.tag import pos_tag | 
					
						
						|  | from nltk.stem import WordNetLemmatizer | 
					
						
						|  | from nltk.corpus import stopwords | 
					
						
						|  |  | 
					
						
						|  | from nltk.tag import StanfordNERTagger | 
					
						
						|  |  | 
					
						
						|  | from collections import Counter | 
					
						
						|  |  | 
					
						
						|  | from textblob import TextBlob | 
					
						
						|  | import seaborn as sns | 
					
						
						|  | import matplotlib.pyplot as plt | 
					
						
						|  |  | 
					
						
						|  | from wordcloud import WordCloud | 
					
						
						|  |  | 
					
						
						|  | import base64 | 
					
						
						|  | import time | 
					
						
						|  | from app_utils import * | 
					
						
						|  |  | 
					
						
						|  | HTML_BANNER = """ | 
					
						
						|  | <div style="background-color:green;padding:10px;border-radius:10px"> | 
					
						
						|  | <h1 style="color:white;text-align:center;">Text Analysis App </h1> | 
					
						
						|  | </div> | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  | def text_analysis(): | 
					
						
						|  | stc.html(HTML_BANNER) | 
					
						
						|  | menu=['Text-analysis','Upload_Files'] | 
					
						
						|  |  | 
					
						
						|  | choice=st.sidebar.selectbox('Menu',menu) | 
					
						
						|  | if choice=='Text-analysis': | 
					
						
						|  | st.subheader('Analyse Text') | 
					
						
						|  | text=st.text_area("Enter the text to anlayze") | 
					
						
						|  | if (st.button("Analyze")): | 
					
						
						|  | st.success("Success") | 
					
						
						|  | with st.expander('Original Text'): | 
					
						
						|  | st.write(text) | 
					
						
						|  | with st.expander('Text Analysis'): | 
					
						
						|  | token_analysis=nlp_analysis(text) | 
					
						
						|  | st.dataframe(token_analysis) | 
					
						
						|  | with st.expander('Entitites'): | 
					
						
						|  | entity_result=find_entities(text) | 
					
						
						|  | stc.html(entity_result, height=100, scrolling=True) | 
					
						
						|  |  | 
					
						
						|  | col1,col2=st.columns(2) | 
					
						
						|  |  | 
					
						
						|  | with col1: | 
					
						
						|  |  | 
					
						
						|  | with st.expander("Word Stats"): | 
					
						
						|  | st.info("Word Statistics") | 
					
						
						|  | docx = nt.TextFrame(text) | 
					
						
						|  | st.write(docx.word_stats()) | 
					
						
						|  |  | 
					
						
						|  | with st.expander("Top keywords"): | 
					
						
						|  | keywords=get_most_common_tokens(text) | 
					
						
						|  | st.write(keywords) | 
					
						
						|  |  | 
					
						
						|  | with st.expander('Tagged Keywords'): | 
					
						
						|  | data= pos_tag(text) | 
					
						
						|  | st.dataframe(data) | 
					
						
						|  | visualize_tags=tag_visualize(data) | 
					
						
						|  | stc.html(visualize_tags,scrolling=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with st.expander("Sentiment"): | 
					
						
						|  | sent_result=get_semantics(text) | 
					
						
						|  | st.write(sent_result) | 
					
						
						|  |  | 
					
						
						|  | with col2: | 
					
						
						|  |  | 
					
						
						|  | with st.expander("Plot word freq"): | 
					
						
						|  | try: | 
					
						
						|  | fig, ax = plt.subplots() | 
					
						
						|  | most_common_tokens = dict(token_analysis["Token"].value_counts()) | 
					
						
						|  | sns.countplot(data=token_analysis[token_analysis["Token"].isin(most_common_tokens)], x="Token", ax=ax) | 
					
						
						|  | ax.set_xlabel('PoS') | 
					
						
						|  | ax.set_ylabel('Frequency') | 
					
						
						|  | ax.tick_params(axis='x' , rotation=45) | 
					
						
						|  | st.pyplot(fig) | 
					
						
						|  | except: | 
					
						
						|  | st.warning('Insufficient data') | 
					
						
						|  |  | 
					
						
						|  | with st.expander("Plot part of speech"): | 
					
						
						|  | try: | 
					
						
						|  | fig, ax = plt.subplots() | 
					
						
						|  | most_common_tokens = dict(token_analysis["Position"].value_counts()) | 
					
						
						|  | sns.countplot(data=token_analysis[token_analysis["Position"].isin(most_common_tokens)], x="Position", ax=ax) | 
					
						
						|  | ax.set_xlabel('PoS') | 
					
						
						|  | ax.set_ylabel('Frequency') | 
					
						
						|  | ax.tick_params(axis='x' , rotation=45) | 
					
						
						|  | st.pyplot(fig) | 
					
						
						|  | except: | 
					
						
						|  | st.warning('Insufficient data') | 
					
						
						|  |  | 
					
						
						|  | with st.expander("Plot word cloud"): | 
					
						
						|  | try: | 
					
						
						|  | plot_wordcloud(text) | 
					
						
						|  | except: | 
					
						
						|  | st.warning('Insufficient data') | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with st.expander('Download Results'): | 
					
						
						|  | file_download(token_analysis) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | elif choice == 'Upload_Files': | 
					
						
						|  | text_file = st.file_uploader('Upload Files', type=['docx']) | 
					
						
						|  | if text_file is not None: | 
					
						
						|  | if text_file.type == 'text/plain': | 
					
						
						|  | text = str(text_file.read(), "utf-8") | 
					
						
						|  | else: | 
					
						
						|  | text = docx2txt.process(text_file) | 
					
						
						|  |  | 
					
						
						|  | if (st.button("Analyze")): | 
					
						
						|  | with st.expander('Original Text'): | 
					
						
						|  | st.write(text) | 
					
						
						|  | with st.expander('Text Analysis'): | 
					
						
						|  | token_analysis = nlp_analysis(text) | 
					
						
						|  | st.dataframe(token_analysis) | 
					
						
						|  | with st.expander('Entities'): | 
					
						
						|  | entity_result = find_entities(text) | 
					
						
						|  | stc.html(entity_result, height=100, scrolling=True) | 
					
						
						|  |  | 
					
						
						|  | col1, col2 = st.columns(2) | 
					
						
						|  |  | 
					
						
						|  | with col1: | 
					
						
						|  | with st.expander("Word Stats"): | 
					
						
						|  | st.info("Word Statistics") | 
					
						
						|  | docx = nt.TextFrame(text) | 
					
						
						|  | st.write(docx.word_stats()) | 
					
						
						|  |  | 
					
						
						|  | with st.expander("Top keywords"): | 
					
						
						|  | keywords = get_most_common_tokens(text) | 
					
						
						|  | st.write(keywords) | 
					
						
						|  |  | 
					
						
						|  | with st.expander("Sentiment"): | 
					
						
						|  | sent_result = get_semantics(text) | 
					
						
						|  | st.write(sent_result) | 
					
						
						|  |  | 
					
						
						|  | with col2: | 
					
						
						|  | with st.expander("Plot word freq"): | 
					
						
						|  | fig, ax = plt.subplots() | 
					
						
						|  | num_tokens = 10 | 
					
						
						|  | most_common_tokens = dict(token_analysis["Token"].value_counts().head(num_tokens)) | 
					
						
						|  | sns.countplot(data=token_analysis[token_analysis["Token"].isin(most_common_tokens)], x="Token", ax=ax) | 
					
						
						|  | ax.set_xlabel('Token') | 
					
						
						|  | ax.set_ylabel('Frequency') | 
					
						
						|  | ax.tick_params(axis='x', rotation=45) | 
					
						
						|  | st.pyplot(fig) | 
					
						
						|  |  | 
					
						
						|  | with st.expander("Plot part of speech"): | 
					
						
						|  | fig, ax = plt.subplots() | 
					
						
						|  | most_common_tokens = dict(token_analysis["Position"].value_counts()) | 
					
						
						|  | sns.countplot(data=token_analysis[token_analysis["Position"].isin(most_common_tokens)], x="Position", ax=ax) | 
					
						
						|  | ax.set_xlabel('PoS') | 
					
						
						|  | ax.set_ylabel('Frequency') | 
					
						
						|  | ax.tick_params(axis='x', rotation=45) | 
					
						
						|  | st.pyplot(fig) | 
					
						
						|  |  | 
					
						
						|  | with st.expander("Plot word cloud"): | 
					
						
						|  | plot_wordcloud(text) | 
					
						
						|  |  | 
					
						
						|  | with st.expander('Download Results'): | 
					
						
						|  | file_download(token_analysis) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  |