import streamlit as st def run(): st.title("Data Preparation") st.header("Introduction") st.write(""" Data Preparation involves cleaning and transforming raw data into a format suitable for analysis. """) st.header("Objectives") st.write(""" - Clean the data. - Transform the data. - Integrate data from multiple sources. """) st.header("Key Activities") st.write(""" - Data cleaning. - Data transformation. - Data integration. """) import pandas as pd st.title("3. Data Preparation") st.write("## Overview") st.write("Cleaning and transforming data for better model performance.") st.write("## Key Concepts & Explanations") st.markdown(""" - red[**Handling Missing Values**]: Fill with mean/median or drop rows. - **Feature Engineering**: Creating new features for better modeling. - **Scaling**: Normalization and standardization for consistency. """) file = st.file_uploader("Upload a dataset", type=["csv"]) if file: df = pd.read_csv(file) option = st.radio("Choose a method to handle missing values", ["Fill with Mean", "Fill with Median", "Drop Rows"]) if option == "Fill with Mean": df.fillna(df.mean(), inplace=True) elif option == "Fill with Median": df.fillna(df.median(), inplace=True) elif option == "Drop Rows": df.dropna(inplace=True) st.write("## Quiz: Conceptual Questions") q1 = st.radio("What is feature engineering?", ["Data visualization", "Creating new features", "Data storage"]) if q1 == "Creating new features": st.success("✅ Correct!") else: st.error("❌ Incorrect.") st.write("## Code-Based Quiz") code_input = st.text_area("Write a function to normalize a column", value="def normalize(col):\n return (col - col.min()) / (col.max() - col.min())") if "col.max() - col.min()" in code_input: st.success("✅ Correct!") else: st.error("❌ Try again.") st.write("## Learning Resources") st.markdown(""" - 📝 [Data Cleaning with Pandas](https://realpython.com/pandas-data-cleaning/) """)