# streamlit_health_dashboard.py import msoffcrypto import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go import matplotlib.pyplot as plt import seaborn as sns import numpy as np from io import BytesIO import os # Create a session state variable for login status # Initialize session state variables if "authenticated" not in st.session_state: st.session_state.authenticated = False # Function to log out def logout(): st.session_state.authenticated = False st.rerun() # If not authenticated, show login form if not st.session_state.authenticated: with st.form("login_form"): password_input = st.text_input("Enter password to access the dashboard:", type="password") submitted = st.form_submit_button("Login") if submitted: if password_input == os.environ.get("password"): st.session_state.authenticated = True st.success("Login successful!") st.rerun() else: st.error("Incorrect password. Please try again.") st.stop() else: # Show logout button at the top after login st.sidebar.button("🚪 Logout", on_click=logout) local_excel_path = "all_BMI.xlsx" if os.path.exists(local_excel_path): #st.success(f"File found locally") try : with open(local_excel_path, "rb") as f: office_file = msoffcrypto.OfficeFile(f) office_file.load_key(password=os.environ.get('password')) # Provide the password decrypted = BytesIO() office_file.decrypt(decrypted) df = pd.read_excel(decrypted) df.columns = [str(col).strip() for col in df.columns] except : df = pd.read_excel(local_excel_path) df.columns = [str(col).strip() for col in df.columns] # df = pd.read_excel(local_excel_path) # Load dataset else : uploaded_file = st.file_uploader("Upload your Excel file", type=["xlsx"]) if uploaded_file is not None: try : # with open(uploaded_file, "rb") as f: office_file = msoffcrypto.OfficeFile(uploaded_file) office_file.load_key(password="Kemo3689") # Provide the password decrypted = BytesIO() office_file.decrypt(decrypted) df = pd.read_excel(decrypted) df.columns = [str(col).strip() for col in df.columns] except : df = pd.read_excel(uploaded_file) df.columns = [str(col).strip() for col in df.columns] # Data Cleaning numeric_cols = ['Cholesterol', 'Triglycerides', 'HbA1c', 'UricAcid', 'Creatinine', 'SGOT(AST)', 'SGPT(ALT)'] for col in numeric_cols: df[col] = pd.to_numeric(df[col], errors='coerce') # Sidebar: Year Filter if 'year' in df.columns: available_years = sorted(df['year'].dropna().unique()) selected_year = st.sidebar.selectbox("Select Year", available_years) df = df[df['year'] == selected_year] dashboard_year = selected_year else: dashboard_year = 2024 # default if no year column exists # Sidebar: Page selection page = st.sidebar.selectbox("Select Page", ["Main Overview", "Detailed Analysis", "Advanced Analytics"]) # Sidebar: Thresholds st.sidebar.header("Threshold Settings 🎯") uric_acid_thresh = st.sidebar.slider("Uric Acid Threshold", 5.0, 10.0, 7.0) creatinine_thresh = st.sidebar.slider("Creatinine Threshold", 0.8, 2.0, 1.2) sgot_thresh = st.sidebar.slider("SGOT(AST) Threshold", 30, 100, 50) sgpt_thresh = st.sidebar.slider("SGPT(ALT) Threshold", 30, 100, 50) # Sidebar: Filters st.sidebar.header("Apply Filters 🔎") show_high_cholesterol = st.sidebar.checkbox("Show High Cholesterol Only (>200)") show_high_tgs = st.sidebar.checkbox("Show High TGs Only (>150)") show_high_hba1c = st.sidebar.checkbox("Show High HbA1c Only (>6.4)") show_pre_diabetes = st.sidebar.checkbox("Show Pre-Diabetes (5.7-6.4)") # Sidebar: Theme theme = st.sidebar.radio("Choose Theme 🌗", ["Light", "Dark"]) if theme == "Dark": st.markdown(""" """, unsafe_allow_html=True) else: st.markdown(""" """, unsafe_allow_html=True) # Apply Filters filtered_df = df.copy() if show_high_cholesterol: filtered_df = filtered_df[filtered_df['Cholesterol'] > 200] if show_high_tgs: filtered_df = filtered_df[filtered_df['Triglycerides'] > 150] if show_high_hba1c: filtered_df = filtered_df[filtered_df['HbA1c'] > 6.4] if show_pre_diabetes: filtered_df = filtered_df[(filtered_df['HbA1c'] >= 5.7) & (filtered_df['HbA1c'] <= 6.4)] # Drop NA values for BMI, Wg, Hg filtered_df2 = df.copy() filtered_df2 = filtered_df2.dropna(subset=['BMI', 'Wg', 'Hg']) # Ensure BMI is numeric filtered_df2['BMI'] = pd.to_numeric(filtered_df2['BMI'], errors='coerce') # Add BMI Category Column def classify_bmi(bmi): if pd.isna(bmi): return "Unknown" elif bmi < 18.5: return "Underweight" elif 18.5 <= bmi < 25: return "Normal" elif 25 <= bmi < 30: return "Overweight" else: return "Obese" filtered_df2['BMI Category'] = filtered_df2['BMI'].apply(classify_bmi) ### ----------------- Main Overview Page ------------------- if page == "Main Overview": st.title(f"Annual Health Checkup Dashboard (Data Year: {dashboard_year})") st.header("Key Performance Indicators 📈") # Calculate safe KPIs (dropna before mean) high_uric_acid = (filtered_df['UricAcid'].dropna() > uric_acid_thresh).mean() * 100 high_creatinine = (filtered_df['Creatinine'].dropna() > creatinine_thresh).mean() * 100 elevated_sgot = (filtered_df['SGOT(AST)'].dropna() > sgot_thresh).mean() * 100 elevated_sgpt = (filtered_df['SGPT(ALT)'].dropna() > sgpt_thresh).mean() * 100 high_cholesterol = (filtered_df['Cholesterol'].dropna() > 200).mean() * 100 high_tgs = (filtered_df['Triglycerides'].dropna() > 150).mean() * 100 high_hba1c = (filtered_df['HbA1c'].dropna() > 6.2).mean() * 100 col1, col2, col3 = st.columns(3) col1.metric("High Uric Acid %", f"{high_uric_acid:.1f}%") col2.metric("High Creatinine %", f"{high_creatinine:.1f}%") col3.metric("Elevated SGOT/SGPT %", f"{(elevated_sgot + elevated_sgpt)/2:.1f}%") col4, col5 = st.columns(2) col4.metric("High Cholesterol %", f"{high_cholesterol:.1f}%") col5.metric("High Triglycerides %", f"{high_tgs:.1f}%") st.header("Distribution Charts 📊") fig = px.histogram(filtered_df, x='Cholesterol', nbins=30, title='Cholesterol Distribution') st.plotly_chart(fig) fig = px.histogram(filtered_df, x='Triglycerides', nbins=30, title='Triglycerides Distribution') st.plotly_chart(fig) fig = px.histogram(filtered_df, x='HbA1c', nbins=30, title='HbA1c Distribution') st.plotly_chart(fig) st.header("Pie Charts for Elevated Metrics 🥧") metrics = { 'Cholesterol >200': filtered_df['Cholesterol'] > 200, 'Triglycerides >150': filtered_df['Triglycerides'] > 150, 'HbA1c >6.2': filtered_df['HbA1c'] > 6.4, 'Pre-Diabetic' : filtered_df['HbA1c'].between(5.7, 6.4), 'Uric Acid': filtered_df['UricAcid'] > uric_acid_thresh, 'Creatinine': filtered_df['Creatinine'] > creatinine_thresh, 'SGOT(AST)': filtered_df['SGOT(AST)'] > sgot_thresh, 'SGPT(ALT)': filtered_df['SGPT(ALT)'] > sgpt_thresh } for metric_name, condition in metrics.items(): fig = px.pie(names=["Above Threshold", "Normal"], values=[condition.sum(), (~condition).sum()], title=metric_name) st.plotly_chart(fig) st.subheader("BMI, Weight, and Height Distribution (Histograms)") st.plotly_chart(px.histogram(filtered_df2, x='BMI', nbins=30, title='BMI Distribution')) st.plotly_chart(px.histogram(filtered_df2, x='Wg', nbins=30, title='Weight (Wg) Distribution')) st.plotly_chart(px.histogram(filtered_df2, x='Hg', nbins=30, title='Height (Hg) Distribution')) # KDE Density Plots st.subheader("BMI, Weight, and Height Distribution (KDE)") fig, ax = plt.subplots(1, 3, figsize=(18, 5)) sns.kdeplot(filtered_df2['BMI'].dropna(), fill=True, ax=ax[0]) ax[0].set_title('BMI Density') sns.kdeplot(filtered_df2['Wg'].dropna(), fill=True, ax=ax[1]) ax[1].set_title('Weight Density (Wg)') sns.kdeplot(filtered_df2['Hg'].dropna(), fill=True, ax=ax[2]) ax[2].set_title('Height Density (Hg)') st.pyplot(fig) # BMI Category Charts st.subheader("BMI Category Distribution 🧍‍♂️") bmi_counts = filtered_df2['BMI Category'].value_counts().reset_index() bmi_counts.columns = ['BMI Category', 'Count'] st.plotly_chart(px.pie(bmi_counts, names='BMI Category', values='Count', title="Proportion of Individuals by BMI Category")) st.plotly_chart(px.bar(bmi_counts, x='BMI Category', y='Count', title="Individuals per BMI Category", color='BMI Category')) # Color-coded Alerts st.subheader("BMI Category Alerts 🚦") bmi_categories = { "Underweight": {"color": "#007bff", "emoji": "🧍‍♂️", "msg": "Underweight individuals detected"}, "Normal": {"color": "#28a745", "emoji": "✅", "msg": "Normal BMI range"}, "Overweight": {"color": "#fd7e14", "emoji": "⚠️", "msg": "Overweight individuals present"}, "Obese": {"color": "#dc3545", "emoji": "🚨", "msg": "Obese individuals found - high risk"} } for category, info in bmi_categories.items(): count = (filtered_df2['BMI Category'] == category).sum() if count > 0: st.markdown( f"""

{info['emoji']} {category}: {info['msg']} — {count} individuals

""", unsafe_allow_html=True ) ### ----------------- Detailed Analysis Page ------------------- elif page == "Detailed Analysis": st.title(f"Detailed Analysis (Data Year: {dashboard_year})") st.header("Correlation Heatmap 🔥") corr = filtered_df[numeric_cols].corr() fig, ax = plt.subplots(figsize=(10, 8)) sns.heatmap(corr, annot=True, cmap='coolwarm', ax=ax) st.pyplot(fig) st.header("Risk Summary Table 📋") risk_summary = pd.DataFrame({ 'Category': ['Very High Risk', 'High Chol & TGs', 'High Chol Only', 'High TGs Only', 'Normal'], 'Count': [ ((filtered_df['Cholesterol'] > 300) | (filtered_df['Triglycerides'] > 400)).sum(), ((filtered_df['Cholesterol'] > 200) & (filtered_df['Triglycerides'] > 150)).sum(), ((filtered_df['Cholesterol'] > 200) & ~(filtered_df['Triglycerides'] > 150)).sum(), ((filtered_df['Triglycerides'] > 150) & ~(filtered_df['Cholesterol'] > 200)).sum(), ((filtered_df['Cholesterol'] <= 200) & (filtered_df['Triglycerides'] <= 150)).sum() ] }) st.dataframe(risk_summary) st.header("Scatter Plot: Cholesterol vs TGs") risk_category = filtered_df.apply( lambda row: "Very High Risk" if (row['Cholesterol'] > 300 or row['Triglycerides'] > 400) else ("High Chol & High TGs" if (row['Cholesterol'] > 200 and row['Triglycerides'] > 150) else ("High Chol Only" if row['Cholesterol'] > 200 else ("High TGs Only" if row['Triglycerides'] > 150 else "Normal"))), axis=1 ) fig = px.scatter( filtered_df, x="Cholesterol", y="Triglycerides", color=risk_category, trendline="ols", hover_data=["HbA1c", "UricAcid"] ) st.plotly_chart(fig) st.subheader("Other Scatter Plots") fig1 = px.scatter(filtered_df, x="HbA1c", y="Cholesterol", trendline="ols", title="HbA1c vs Cholesterol") st.plotly_chart(fig1) fig2 = px.scatter(filtered_df, x="UricAcid", y="Creatinine", trendline="ols", title="Uric Acid vs Creatinine") st.plotly_chart(fig2) fig3 = px.scatter(filtered_df, x="SGPT(ALT)", y="SGOT(AST)", trendline="ols", title="SGPT(ALT) vs SGOT(AST)") st.plotly_chart(fig3) # Boxplots st.subheader("Boxplots for BMI, Weight, and Height") fig, ax = plt.subplots(1, 3, figsize=(18, 5)) sns.boxplot(y=filtered_df2['BMI'], ax=ax[0]) ax[0].set_title("BMI") sns.boxplot(y=filtered_df2['Wg'], ax=ax[1]) ax[1].set_title("Weight (Wg)") sns.boxplot(y=filtered_df2['Hg'], ax=ax[2]) ax[2].set_title("Height (Hg)") st.pyplot(fig) # Scatter Plot st.subheader("Scatter Plot: BMI vs Cholesterol") st.plotly_chart(px.scatter(filtered_df2, x="BMI", y="Cholesterol", trendline="ols", title="BMI vs Cholesterol")) # Risk Summary by BMI Category st.subheader("BMI Categories and Risk Levels 🧪") risk_by_bmi = filtered_df2.groupby('BMI Category').apply( lambda x: pd.Series({ 'Count': len(x), 'High Cholesterol %': (x['Cholesterol'] > 200).mean() * 100, 'High Triglycerides %': (x['Triglycerides'] > 150).mean() * 100, 'High HbA1c %': (x['HbA1c'] > 6.4).mean() * 100 }) ).round(1).reset_index() st.dataframe(risk_by_bmi) ### ----------------- Advanced Analytics Page ------------------- elif page == "Advanced Analytics": st.title("🚀 Advanced Analytics") st.info("Coming Soon: Predictive modeling for health risks!")