import streamlit as st import pandas as pd import json def render_dataset_preview(dataset, dataset_type): """ Renders a preview of the dataset with pagination options. Args: dataset: The dataset to preview (pandas DataFrame) dataset_type: The type of dataset (csv, json, etc.) """ if dataset is None: st.warning("No dataset to preview.") return st.markdown(f"

Dataset Preview: {st.session_state.dataset_name}

", unsafe_allow_html=True) # Show basic info col1, col2, col3 = st.columns(3) with col1: st.metric("Rows", f"{dataset.shape[0]:,}") with col2: st.metric("Columns", f"{dataset.shape[1]:,}") with col3: st.metric("Type", dataset_type.upper()) # Preview options col1, col2 = st.columns([1, 3]) with col1: num_rows = st.number_input("Rows to display", min_value=5, max_value=100, value=10, step=5) with col2: preview_mode = st.radio("Preview mode", ["Head", "Tail", "Sample"], horizontal=True) # Display dataset preview st.markdown("
", unsafe_allow_html=True) if preview_mode == "Head": st.dataframe(dataset.head(num_rows), use_container_width=True) elif preview_mode == "Tail": st.dataframe(dataset.tail(num_rows), use_container_width=True) else: # Sample st.dataframe(dataset.sample(min(num_rows, len(dataset))), use_container_width=True) st.markdown("
", unsafe_allow_html=True) # Show dataset schema with st.expander("Dataset Schema"): col1, col2 = st.columns(2) with col1: st.markdown("**Column Types**") type_df = pd.DataFrame({ 'Column': dataset.dtypes.index, 'Type': dataset.dtypes.values.astype(str) }) st.dataframe(type_df, use_container_width=True) with col2: st.markdown("**Missing Values**") missing_df = pd.DataFrame({ 'Column': dataset.columns, 'Missing': dataset.isna().sum().values, 'Percentage': dataset.isna().sum().values / len(dataset) * 100 }) st.dataframe(missing_df.style.format({ 'Percentage': '{:.2f}%' }), use_container_width=True) # Raw data with st.expander("Raw Data (First 5 records)"): if dataset_type == 'csv': st.code(dataset.head(5).to_csv(index=False), language="text") else: # json or jsonl st.code(dataset.head(5).to_json(orient='records', indent=2), language="json")