import streamlit as st from sklearn.preprocessing import MinMaxScaler import pandas as pd def run(): st.title("Feature Engineering") st.write("## Overview") st.write(""" Feature Engineering is the process of using domain knowledge to create features (input variables) that make machine learning algorithms work better. It involves transforming raw data into meaningful features that improve the performance of machine learning models. """) st.write("## Key Concepts & Explanations") st.markdown(""" - **Feature Creation**: Generating new features from existing data. - **Feature Transformation**: Modifying features to make them more suitable for modeling. - **Feature Selection**: Identifying the most relevant features for the model. - **Feature Scaling**: Normalizing or standardizing features to ensure they are on a similar scale. """) st.write("## Introduction") st.write(""" Feature Engineering is a crucial step in the data science process. It can significantly impact the performance of machine learning models by providing them with the right input variables. Effective feature engineering requires a deep understanding of the data and the problem domain. """) st.header("Objectives") st.write(""" - **Create New Features**: Generate new features that capture important information from the data. - **Transform Existing Features**: Modify existing features to make them more suitable for modeling. - **Select Relevant Features**: Identify and select the most relevant features for the model. - **Scale Features**: Normalize or standardize features to ensure they are on a similar scale. """) st.header("Key Activities") st.write(""" - **Feature Creation**: Generate new features from existing data using domain knowledge. - **Feature Transformation**: Apply mathematical transformations to features to improve their suitability for modeling. - **Feature Selection**: Use statistical techniques to identify the most relevant features for the model. - **Feature Scaling**: Normalize or standardize features to ensure they are on a similar scale. """) st.write("## Detailed Steps") st.write(""" 1. **Feature Creation**: - Generate new features from existing data using domain knowledge. - Combine multiple features to create new ones (e.g., ratios, differences). 2. **Feature Transformation**: - Apply mathematical transformations (e.g., log, square root) to features. - Encode categorical variables using techniques like one-hot encoding or label encoding. 3. **Feature Selection**: - Use statistical techniques (e.g., correlation, mutual information) to identify relevant features. - Apply dimensionality reduction techniques (e.g., PCA) to reduce the number of features. 4. **Feature Scaling**: - Normalize features to a range (e.g., 0 to 1) using MinMaxScaler. - Standardize features to have a mean of 0 and a standard deviation of 1 using StandardScaler. """) st.write("## Quiz: Conceptual Questions") q1 = st.radio("What is the main purpose of feature engineering?", ["Improve model accuracy", "Reduce dataset size", "Make data harder to interpret"]) if q1 == "Improve model accuracy": st.success("✅ Correct!") else: st.error("❌ Incorrect. The main purpose is to improve model accuracy.") st.write("## Code-Based Quiz") code_input = st.text_area("Write a function to normalize a dataset using MinMaxScaler", value="from sklearn.preprocessing import MinMaxScaler\n\ndef normalize_data(df):\n scaler = MinMaxScaler()\n return pd.DataFrame(scaler.fit_transform(df), columns=df.columns)") if "MinMaxScaler" in code_input: st.success("✅ Correct!") else: st.error("❌ Try again.") st.write("## Learning Resources") st.markdown(""" - 📘 [Feature Engineering for Machine Learning](https://towardsdatascience.com/feature-engineering-for-machine-learning-3a5e293a5114) - 🎓 [Scikit-learn Feature Engineering Guide](https://scikit-learn.org/stable/modules/feature_extraction.html) - 🔬 [Feature Engineering Techniques](https://www.analyticsvidhya.com/blog/2021/10/a-comprehensive-guide-on-feature-engineering/) """)