import streamlit as st import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from dataset_import import load_data from data_cleaning import clean_data st.title("AnalyzeYT Dataset Analysis") # Load dataset data = load_data() if data is not None: st.write("Loaded Data Preview:") st.write(data.head()) # Clean data data = clean_data(data) st.write("Cleaned Data Preview:") st.write(data.head()) # Show data description of cleaned data st.write("Cleaned Data Description:") st.write(data.describe()) # Add correlation matrix for cleaned data st.write("Correlation Matrix of Cleaned Data:") corr = data.corr() st.write(corr) sns.heatmap(corr, annot=True, cmap='coolwarm') st.pyplot() # Data visualization options for cleaned data st.write("Data Visualization on Cleaned Data:") chart_type = st.selectbox("Select Chart Type", ['Line Chart', 'Bar Chart', 'Histogram']) if chart_type == 'Line Chart': x_axis = st.selectbox("Select X-axis Column", data.columns) y_axis = st.selectbox("Select Y-axis Column", data.columns) title = st.text_input("Enter Chart Title", "Line Chart") st.line_chart(data[[x_axis, y_axis]]) st.write(f"Line Chart: {title}") elif chart_type == 'Bar Chart': x_axis = st.selectbox("Select X-axis Column", data.columns) y_axis = st.selectbox("Select Y-axis Column", data.columns) title = st.text_input("Enter Chart Title", "Bar Chart") st.bar_chart(data[[x_axis, y_axis]]) st.write(f"Bar Chart: {title}") elif chart_type == 'Histogram': selected_column = st.selectbox("Select Column for Histogram", data.columns) bins = st.slider("Number of Bins", min_value=10, max_value=100, value=30) title = st.text_input("Enter Chart Title", "Histogram") plt.hist(data[selected_column], bins=bins) plt.title(title) plt.xlabel(selected_column) plt.ylabel('Frequency') st.pyplot()