Spaces:

Gopi9177
/

NLP

Running

App Files Files Community

Gopi9177 commited on Feb 10

Commit

71169a9

verified ·

1 Parent(s): 7dba2e2

Delete pages/Techniques of NLP.py

Browse files

Files changed (1) hide show

pages/Techniques of NLP.py +0 -142

pages/Techniques of NLP.py DELETED Viewed

@@ -1,142 +0,0 @@
-import streamlit as st
-import string
-import numpy as np
-import pandas as pd
-import nltk
-from nltk.corpus import stopwords
-from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
-from sklearn.naive_bayes import MultinomialNB
-from sklearn.linear_model import LogisticRegression
-from sklearn.svm import SVC
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.datasets import fetch_20newsgroups
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score, classification_report
-from sklearn.decomposition import LatentDirichletAllocation, NMF
-from wordcloud import WordCloud
-import matplotlib.pyplot as plt
-# Download NLTK stopwords
-nltk.download('stopwords')
-stop_words = set(stopwords.words('english'))
-# Page title with emoji
-st.title('🌟 Traditional NLP Techniques 🌟')
-# Text Preprocessing
-st.header('🔹 1. Text Preprocessing')
-st.subheader('📖 Definition:')
-st.write("""
-Text preprocessing is the process of cleaning and preparing raw text for further analysis or modeling.
-This includes tasks such as removing unnecessary punctuation, converting text to lowercase,
-and handling special characters like emojis.
-""")
-# Interactive example for preprocessing
-text_input = st.text_area("✍️ Enter text to preprocess", "I love NLP! 😍 This is amazing.")
-col1, col2, col3, col4 = st.columns(4)
-with col1:
-    if st.button('✂️ Remove Punctuation'):
-        processed_text = ''.join([char for char in text_input if char not in string.punctuation])
-        st.success(f"Text without punctuation: {processed_text}")
-with col2:
-    if st.button('🔡 Convert to Lowercase'):
-        lowercase_text = text_input.lower()
-        st.success(f"Text in lowercase: {lowercase_text}")
-with col3:
-    if st.button('😊 Remove Emojis'):
-        processed_text_no_emoji = ''.join(char for char in text_input if char.isalnum() or char.isspace())
-        st.success(f"Text without emojis: {processed_text_no_emoji}")
-with col4:
-    if st.button('🚫 Remove Stopwords'):
-        words = text_input.split()
-        filtered_text = ' '.join([word for word in words if word.lower() not in stop_words])
-        st.success(f"Text without stopwords: {filtered_text}")
-# Text Vectorization
-st.header('📊 2. Text Vectorization')
-st.subheader('📖 Definition:')
-st.write("""
-Text vectorization converts text into numerical form so that machine learning models can process it.
-Two common techniques are Bag of Words (BoW) and Term Frequency-Inverse Document Frequency (TF-IDF).
-""")
-# Interactive example for vectorization
-vectorization_choice = st.selectbox('🛠 Choose vectorization technique:', ('Bag of Words', 'TF-IDF'))
-# Text for vectorization
-sample_text = ["I love programming.", "NLP is fun.", "Streamlit makes things easy!"]
-if st.button('🚀 Apply Vectorization'):
-    vectorizer = CountVectorizer() if vectorization_choice == 'Bag of Words' else TfidfVectorizer()
-    X = vectorizer.fit_transform(sample_text)
-    st.write(f"**Vectorized Representation:**\n{X.toarray()}")
-    st.write(f"**Feature names:** {vectorizer.get_feature_names_out()}")
-# Basic Machine Learning
-st.header('🤖 3. Basic Machine Learning')
-st.subheader('📖 Definition:')
-st.write("""
-Basic machine learning techniques, such as Naive Bayes, Logistic Regression, and Support Vector Machines (SVM),
-are commonly used for text classification tasks.
-""")
-# Load dataset
-newsgroups = fetch_20newsgroups(subset='train')
-X_train, X_test, y_train, y_test = train_test_split(newsgroups.data, newsgroups.target, test_size=0.3)
-model_choice = st.selectbox('🤔 Choose machine learning model for text classification:',
-                            ('Naive Bayes', 'Logistic Regression', 'SVM', 'Random Forest'))
-# Vectorization for classification
-vectorizer = TfidfVectorizer()
-X_train_vec = vectorizer.fit_transform(X_train)
-X_test_vec = vectorizer.transform(X_test)
-if st.button('🎯 Train Model'):
-    model = {'Naive Bayes': MultinomialNB(), 'Logistic Regression': LogisticRegression(max_iter=1000),
-             'SVM': SVC(), 'Random Forest': RandomForestClassifier()}[model_choice]
-    model.fit(X_train_vec, y_train)
-    y_pred = model.predict(X_test_vec)
-    accuracy = accuracy_score(y_test, y_pred)
-    st.success(f"🏆 Model Accuracy: {accuracy * 100:.2f}%")
-    st.text("📊 Classification Report:")
-    st.text(classification_report(y_test, y_pred))
-# Topic Modeling
-st.header('📚 4. Topic Modeling')
-st.subheader('📖 Definition:')
-st.write("""
-Topic modeling is a technique used to identify the underlying topics in a collection of text data.
-Latent Dirichlet Allocation (LDA) and Non-negative Matrix Factorization (NMF) are two common techniques for this task.
-""")
-topic_model_choice = st.selectbox('🔍 Choose topic modeling technique:', ('LDA', 'NMF'))
-if st.button('📌 Run Topic Modeling'):
-    vectorizer = TfidfVectorizer(max_df=0.95, min_df=2)
-    X = vectorizer.fit_transform(newsgroups.data)
-    model = LatentDirichletAllocation(n_components=5, random_state=42) if topic_model_choice == 'LDA' else NMF(n_components=5, random_state=42)
-    model.fit(X)
-    feature_names = vectorizer.get_feature_names_out()
-    for topic_idx, topic in enumerate(model.components_):
-        st.write(f"📝 **Topic {topic_idx + 1}:**")
-        top_words_idx = topic.argsort()[:-10 - 1:-1]
-        top_words = [feature_names[i] for i in top_words_idx]
-        st.success(", ".join(top_words))
-    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(" ".join(top_words))
-    st.image(wordcloud.to_array(), caption=f"🌥 Word Cloud for Topic {topic_idx + 1}")