Spaces:
Sleeping
Sleeping
File size: 3,500 Bytes
a1e3335 7038265 11a46a2 73a688e 7038265 b5b1133 aaafc99 b5b1133 7038265 a1e3335 73a688e 3a2859f 73a688e a1e3335 73a688e a1e3335 73a688e a1e3335 73a688e a1e3335 73a688e a1e3335 73a688e a1e3335 73a688e a1e3335 73a688e a1e3335 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from analyze import analyze_csv
from plan import generate_cleaning_plan
from execute import execute_plan
from insight import generate_insights
from visual_insight import generate_visual_plan
from report import ReportBuilder
from transformers import AutoTokenizer
# Temp-safe paths
input_path = "/tmp/input.csv"
output_path = "/tmp/output.csv"
report_path = "/tmp/final_report.pdf"
charts_dir = "/tmp/charts"
os.makedirs(charts_dir, exist_ok=True)
# Authenticate and load tokenizer to check access
hf_token = os.environ.get("HUGGINGFACE_TOKEN")
cache_dir = "/tmp/hf_cache"
tokenizer = AutoTokenizer.from_pretrained("google/gemma-3n-E4B", token=hf_token, cache_dir=cache_dir)
st.set_page_config(page_title="Smart Data Cleaning Agent", layout="wide")
st.title("π§ Smart Data Cleaning Agent")
uploaded_file = st.file_uploader("π Upload a CSV file", type=["csv"])
if uploaded_file:
# Save file to /tmp/ for processing
with open(input_path, "wb") as f:
f.write(uploaded_file.getbuffer())
df = pd.read_csv(input_path)
st.subheader("π Original Data Preview")
st.dataframe(df.head())
with st.spinner("π Analyzing CSV..."):
analysis = analyze_csv(input_path)
with st.spinner("π§Ό Generating Cleaning Plan..."):
cleaning_plan, cleaning_summary = generate_cleaning_plan(analysis)
st.subheader("π§Ή Cleaning Plan")
st.json(cleaning_plan)
st.markdown("### β
Cleaning Summary")
st.markdown(cleaning_summary)
with st.spinner("π§ͺ Applying cleaning..."):
cleaned_df = execute_plan(df.copy(), cleaning_plan)
cleaned_df.to_csv(output_path, index=False)
st.subheader("π§Ό Cleaned Data Preview")
st.dataframe(cleaned_df.head())
st.download_button("β¬οΈ Download Cleaned CSV", cleaned_df.to_csv(index=False), file_name="cleaned.csv")
with st.spinner("π§ Deriving insights..."):
insights = generate_insights(analysis["columns"])
st.subheader("π EDA Insights")
st.text(insights)
with st.spinner("π Generating recommended plots..."):
visuals = generate_visual_plan(analysis["columns"])
for vis in visuals:
st.markdown(f"#### {vis['title']}")
st.markdown(vis['description'])
try:
safe_code = vis["code"].replace("charts/", f"{charts_dir}/")
exec(safe_code, {"df": cleaned_df, "plt": plt, "sns": sns, "os": os})
st.pyplot(plt.gcf())
plt.clf()
except Exception as e:
st.error(f"β Failed to render: {e}")
if st.button("π Generate PDF Report"):
report = ReportBuilder(output_path=report_path)
report.add_title("π Smart Data Cleaning Report")
report.add_section("Cleaning Summary", cleaning_summary)
report.add_section("EDA Insights", insights)
for vis in visuals:
if "savefig('" in vis["code"]:
path = vis["code"].split("savefig('")[-1].split("')")[0]
if not path.startswith("/"):
path = os.path.join(charts_dir, os.path.basename(path))
report.add_plot(path, vis["description"])
report.save()
with open(report_path, "rb") as f:
st.download_button("β¬οΈ Download PDF Report", f, file_name="smart_data_report.pdf")
|