Osnly commited on
Commit
73a688e
Β·
verified Β·
1 Parent(s): b2c47ac

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +24 -19
src/streamlit_app.py CHANGED
@@ -13,13 +13,12 @@ from report import ReportBuilder
13
 
14
  from transformers import AutoTokenizer
15
 
16
- # Use a writable cache dir to avoid permission issues on Hugging Face Spaces
17
- HF_CACHE_DIR = "./hf_cache"
18
- os.environ["HF_HOME"] = HF_CACHE_DIR
19
- os.environ["TRANSFORMERS_CACHE"] = HF_CACHE_DIR
20
-
21
- # Ensure cache directory exists
22
- os.makedirs(HF_CACHE_DIR, exist_ok=True)
23
 
24
  # Authenticate and load tokenizer to check access
25
  hf_token = os.environ.get("HUGGINGFACE_TOKEN")
@@ -32,17 +31,19 @@ tokenizer = AutoTokenizer.from_pretrained(
32
  st.set_page_config(page_title="Smart Data Cleaning Agent", layout="wide")
33
  st.title("🧠 Smart Data Cleaning Agent")
34
 
35
- os.makedirs("charts", exist_ok=True)
36
-
37
  uploaded_file = st.file_uploader("πŸ“‚ Upload a CSV file", type=["csv"])
38
 
39
  if uploaded_file:
40
- df = pd.read_csv(uploaded_file)
 
 
 
 
41
  st.subheader("πŸ” Original Data Preview")
42
  st.dataframe(df.head())
43
 
44
  with st.spinner("πŸ“Š Analyzing CSV..."):
45
- analysis = analyze_csv(uploaded_file)
46
 
47
  with st.spinner("🧼 Generating Cleaning Plan..."):
48
  cleaning_plan, cleaning_summary = generate_cleaning_plan(analysis)
@@ -53,6 +54,7 @@ if uploaded_file:
53
 
54
  with st.spinner("πŸ§ͺ Applying cleaning..."):
55
  cleaned_df = execute_plan(df.copy(), cleaning_plan)
 
56
  st.subheader("🧼 Cleaned Data Preview")
57
  st.dataframe(cleaned_df.head())
58
  st.download_button("⬇️ Download Cleaned CSV", cleaned_df.to_csv(index=False), file_name="cleaned.csv")
@@ -68,23 +70,26 @@ if uploaded_file:
68
  st.markdown(f"#### {vis['title']}")
69
  st.markdown(vis['description'])
70
  try:
71
- exec(vis["code"], {"df": cleaned_df, "plt": plt, "sns": sns, "os": os})
 
72
  st.pyplot(plt.gcf())
73
  plt.clf()
74
  except Exception as e:
75
  st.error(f"❌ Failed to render: {e}")
76
 
77
  if st.button("πŸ“ Generate PDF Report"):
78
- report = ReportBuilder("report.pdf")
79
  report.add_title("πŸ“Š Smart Data Cleaning Report")
80
- report.add_text("Cleaning Summary", cleaning_summary)
81
- report.add_text("EDA Insights", insights)
82
 
83
  for vis in visuals:
84
- if "savefig('" in vis['code']:
85
- path = vis['code'].split("savefig('")[-1].split("')")[0]
86
- report.add_image(path, vis['description'])
 
 
87
 
88
  report.save()
89
- with open("report.pdf", "rb") as f:
90
  st.download_button("⬇️ Download PDF Report", f, file_name="smart_data_report.pdf")
 
13
 
14
  from transformers import AutoTokenizer
15
 
16
+ # Temp-safe paths
17
+ input_path = "/tmp/input.csv"
18
+ output_path = "/tmp/output.csv"
19
+ report_path = "/tmp/final_report.pdf"
20
+ charts_dir = "/tmp/charts"
21
+ os.makedirs(charts_dir, exist_ok=True)
 
22
 
23
  # Authenticate and load tokenizer to check access
24
  hf_token = os.environ.get("HUGGINGFACE_TOKEN")
 
31
  st.set_page_config(page_title="Smart Data Cleaning Agent", layout="wide")
32
  st.title("🧠 Smart Data Cleaning Agent")
33
 
 
 
34
  uploaded_file = st.file_uploader("πŸ“‚ Upload a CSV file", type=["csv"])
35
 
36
  if uploaded_file:
37
+ # Save file to /tmp/ for processing
38
+ with open(input_path, "wb") as f:
39
+ f.write(uploaded_file.read())
40
+
41
+ df = pd.read_csv(input_path)
42
  st.subheader("πŸ” Original Data Preview")
43
  st.dataframe(df.head())
44
 
45
  with st.spinner("πŸ“Š Analyzing CSV..."):
46
+ analysis = analyze_csv(input_path)
47
 
48
  with st.spinner("🧼 Generating Cleaning Plan..."):
49
  cleaning_plan, cleaning_summary = generate_cleaning_plan(analysis)
 
54
 
55
  with st.spinner("πŸ§ͺ Applying cleaning..."):
56
  cleaned_df = execute_plan(df.copy(), cleaning_plan)
57
+ cleaned_df.to_csv(output_path, index=False)
58
  st.subheader("🧼 Cleaned Data Preview")
59
  st.dataframe(cleaned_df.head())
60
  st.download_button("⬇️ Download Cleaned CSV", cleaned_df.to_csv(index=False), file_name="cleaned.csv")
 
70
  st.markdown(f"#### {vis['title']}")
71
  st.markdown(vis['description'])
72
  try:
73
+ safe_code = vis["code"].replace("charts/", f"{charts_dir}/")
74
+ exec(safe_code, {"df": cleaned_df, "plt": plt, "sns": sns, "os": os})
75
  st.pyplot(plt.gcf())
76
  plt.clf()
77
  except Exception as e:
78
  st.error(f"❌ Failed to render: {e}")
79
 
80
  if st.button("πŸ“ Generate PDF Report"):
81
+ report = ReportBuilder(output_path=report_path)
82
  report.add_title("πŸ“Š Smart Data Cleaning Report")
83
+ report.add_section("Cleaning Summary", cleaning_summary)
84
+ report.add_section("EDA Insights", insights)
85
 
86
  for vis in visuals:
87
+ if "savefig('" in vis["code"]:
88
+ path = vis["code"].split("savefig('")[-1].split("')")[0]
89
+ if not path.startswith("/"):
90
+ path = os.path.join(charts_dir, os.path.basename(path))
91
+ report.add_plot(path, vis["description"])
92
 
93
  report.save()
94
+ with open(report_path, "rb") as f:
95
  st.download_button("⬇️ Download PDF Report", f, file_name="smart_data_report.pdf")