Update pages/Basics Of NLP.py
Browse files- pages/Basics Of NLP.py +1 -37
pages/Basics Of NLP.py
CHANGED
|
@@ -12,8 +12,6 @@ nltk.download("stopwords")
|
|
| 12 |
nltk.download("wordnet")
|
| 13 |
nltk.download("vader_lexicon")
|
| 14 |
|
| 15 |
-
# Load spaCy model
|
| 16 |
-
nlp = spacy.load("en_core_web_sm")
|
| 17 |
|
| 18 |
# Streamlit app configuration
|
| 19 |
st.set_page_config(page_title="NLP Basics", page_icon="π€", layout="wide")
|
|
@@ -40,7 +38,7 @@ if not text_input.strip():
|
|
| 40 |
st.warning("Please enter some text to explore NLP concepts.")
|
| 41 |
|
| 42 |
# NLP Processing Sections
|
| 43 |
-
tabs = st.tabs(["Tokenization", "Stopwords", "Lemmatization & Stemming", "Bag of Words (BoW)", "TF-IDF"
|
| 44 |
|
| 45 |
# Tokenization
|
| 46 |
with tabs[0]:
|
|
@@ -103,40 +101,6 @@ with tabs[4]:
|
|
| 103 |
st.write("**TF-IDF Matrix:**", tfidf_matrix.toarray())
|
| 104 |
st.write("**Feature Names:**", tfidf_vectorizer.get_feature_names_out())
|
| 105 |
|
| 106 |
-
# Named Entity Recognition (NER)
|
| 107 |
-
with tabs[5]:
|
| 108 |
-
st.header("π· Named Entity Recognition (NER)")
|
| 109 |
-
if text_input.strip():
|
| 110 |
-
doc = nlp(text_input)
|
| 111 |
-
entities = [(ent.text, ent.label_) for ent in doc.ents]
|
| 112 |
-
st.write("**Named Entities:**", entities)
|
| 113 |
-
|
| 114 |
-
# Sentiment Analysis
|
| 115 |
-
with tabs[6]:
|
| 116 |
-
st.header("π Sentiment Analysis")
|
| 117 |
-
if text_input.strip():
|
| 118 |
-
sia = SentimentIntensityAnalyzer()
|
| 119 |
-
sentiment_scores = sia.polarity_scores(text_input)
|
| 120 |
-
st.write("**Sentiment Scores:**", sentiment_scores)
|
| 121 |
-
|
| 122 |
-
# Text Summarization
|
| 123 |
-
with tabs[7]:
|
| 124 |
-
st.header("π Text Summarization")
|
| 125 |
-
if text_input.strip():
|
| 126 |
-
parser = PlaintextParser.from_string(text_input, Tokenizer("english"))
|
| 127 |
-
summarizer = LsaSummarizer()
|
| 128 |
-
summary = summarizer(parser.document, 3) # Summarize into 3 sentences
|
| 129 |
-
st.write("**Summary:**", " ".join([str(s) for s in summary]))
|
| 130 |
-
|
| 131 |
-
# Word Cloud Visualization
|
| 132 |
-
with tabs[8]:
|
| 133 |
-
st.header("βοΈ Word Cloud Visualization")
|
| 134 |
-
if text_input.strip():
|
| 135 |
-
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text_input)
|
| 136 |
-
fig, ax = plt.subplots()
|
| 137 |
-
ax.imshow(wordcloud, interpolation='bilinear')
|
| 138 |
-
ax.axis("off")
|
| 139 |
-
st.pyplot(fig)
|
| 140 |
|
| 141 |
# Footer
|
| 142 |
st.markdown("---")
|
|
|
|
| 12 |
nltk.download("wordnet")
|
| 13 |
nltk.download("vader_lexicon")
|
| 14 |
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Streamlit app configuration
|
| 17 |
st.set_page_config(page_title="NLP Basics", page_icon="π€", layout="wide")
|
|
|
|
| 38 |
st.warning("Please enter some text to explore NLP concepts.")
|
| 39 |
|
| 40 |
# NLP Processing Sections
|
| 41 |
+
tabs = st.tabs(["Tokenization", "Stopwords", "Lemmatization & Stemming", "Bag of Words (BoW)", "TF-IDF"])
|
| 42 |
|
| 43 |
# Tokenization
|
| 44 |
with tabs[0]:
|
|
|
|
| 101 |
st.write("**TF-IDF Matrix:**", tfidf_matrix.toarray())
|
| 102 |
st.write("**Feature Names:**", tfidf_vectorizer.get_feature_names_out())
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
# Footer
|
| 106 |
st.markdown("---")
|