Spaces:
Sleeping
Sleeping
| #main.py | |
| import pandas as pd | |
| from analyze import analyze_csv | |
| from plan import generate_cleaning_plan | |
| from execute import execute_plan | |
| from insight import generate_insights | |
| from visual_insight import generate_visual_plan | |
| from run_viz_code import run_visualizations | |
| import time | |
| from report import ReportBuilder | |
| import re | |
| start_time = time.time() | |
| input_path = "adult.csv" | |
| output_path = "output.csv" | |
| # Step 1: Analyze | |
| analysis = analyze_csv(input_path) | |
| print("Analysis Complete") | |
| # Step 2: Plan | |
| cleaning_plan, explanation = generate_cleaning_plan(analysis) | |
| print("\nCleaning Plan:") | |
| for step in cleaning_plan: | |
| if "colum" in step and "column" not in step: | |
| step["column"] = step.pop() | |
| print(step) | |
| print("\nExplanation:") | |
| print(explanation) | |
| # Step 3: Execute | |
| original_df = pd.read_csv(input_path) | |
| cleaned_df = execute_plan(original_df, cleaning_plan) | |
| cleaned_df.to_csv(output_path, index=False) | |
| print(f"\nOutput saved to {output_path}") | |
| # Step 4: Re-analyze cleaned file | |
| cleaned_analysis = analyze_csv(output_path) | |
| print("\nCleaned Data EDA Summary:") | |
| for col, info in cleaned_analysis["columns"].items(): | |
| print(f"{col} → type: {info['dtype']}, missing: {info['missing_pct']:.1f}%, unique: {info['unique_vals']}") | |
| # Step 5: LLM derives human-style insights | |
| print("\nData Insights:") | |
| insights = generate_insights(cleaned_analysis["columns"]) | |
| print(insights) | |
| # Step 6: Visualization Plan | |
| print("\nGenerating visual plan from LLM...") | |
| visual_plan = generate_visual_plan(cleaned_analysis["columns"]) | |
| run_visualizations(cleaned_df, visual_plan) | |
| print("Visuals saved to charts/") | |
| # 🧾 Build report | |
| report = ReportBuilder("final_report.pdf") | |
| report.add_title("Smart Data Cleaning & EDA Report") | |
| report.add_section("Cleaning Summary", explanation) | |
| report.add_section("EDA Summary", insights) | |
| for viz in visual_plan: | |
| image_path = re.findall(r"plt\.savefig\(['\"](.*?)['\"]\)", viz["code"]) | |
| if image_path: | |
| report.add_plot(image_path[0], viz["description"]) | |
| report.save() | |
| print("PDF report saved to final_report.pdf") | |
| end_time = time.time() | |
| duration = end_time - start_time | |
| print(f"\nTotal Time: {duration:.2f} seconds") |