Abdullah123456789 commited on
Commit
06bd2e9
·
verified ·
1 Parent(s): 5d088a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -91
app.py CHANGED
@@ -6,21 +6,21 @@ import sys
6
  # ---------------------------
7
  required_packages = [
8
  "pandas",
9
- "scikit-learn"
 
10
  ]
11
 
12
  for package in required_packages:
13
  try:
14
  __import__(package.replace("-", "_"))
15
  except ImportError:
16
- print(f"Installing missing package: {package}")
17
  subprocess.check_call([sys.executable, "-m", "pip", "install", package])
18
 
19
  # ---------------------------
20
  # Imports after ensuring installation
21
  # ---------------------------
 
22
  import pandas as pd
23
- import argparse
24
  import os
25
  from sklearn.datasets import load_iris
26
 
@@ -33,7 +33,6 @@ from agents.visualization import VisualizationAgent
33
  from agents.hypothesis_testing import HypothesisTestingAgent
34
  from agents.report_generator import ReportGeneratorAgent
35
 
36
-
37
  # ---------------------------
38
  # Load sample dataset
39
  # ---------------------------
@@ -44,98 +43,84 @@ def load_sample_dataset():
44
  df['species'] = pd.Categorical(iris.target_names[iris.target])
45
  return df, 'dataframe'
46
 
47
-
48
  # ---------------------------
49
  # Main workflow
50
  # ---------------------------
51
- def main(data_source=None, file_format='csv', output_dir='outputs'):
52
- """
53
- Main function to run the AutoStatAgent workflow.
54
-
55
- Args:
56
- data_source: Path to dataset file or None for sample dataset
57
- file_format: 'csv', 'excel', 'json', or 'dataframe'
58
- output_dir: Directory for outputs (visualizations and report)
59
- """
60
- try:
61
- # Load dataset
62
- if data_source is None:
63
- print("No dataset provided. Using sample Iris dataset.")
64
- df, file_format = load_sample_dataset()
65
- else:
66
- df = data_source
67
- if not isinstance(df, pd.DataFrame):
68
- if not os.path.exists(data_source):
69
- raise FileNotFoundError(f"Dataset file not found: {data_source}")
70
-
71
- print("\n=== AutoStatAgent Workflow ===")
72
-
73
- # Step 1: Data Profiling
74
- profiler = DataProfilerAgent(df, file_format)
75
- profile = profiler.profile()
76
- print("\nDataset Profile:")
77
- print(f"Shape: {profile['shape']}")
78
- print(f"Columns: {profile['columns']}")
79
- print(f"Missing Values: {profile['missing_values']}")
80
- print(f"Duplicate Rows: {profile['duplicate_rows']}")
81
- print("\nVariable Types:")
82
- for var_type, cols in profile['variable_types'].items():
83
- print(f"{var_type.capitalize()}: {cols}")
84
-
85
- # Step 2: Question Generation
86
- question_generator = QuestionGeneratorAgent(df, profile['variable_types'])
87
- questions = question_generator.generate_questions()
88
-
89
- # Step 3: Exploratory Data Analysis
90
- eda_agent = EDAAgent(df, file_format, output_dir=output_dir)
91
- eda_results = eda_agent.perform_eda()
92
-
93
- # Step 4: Answer Generation
94
- answer_agent = AnswerGeneratorAgent(df, profile['variable_types'])
95
- answers = answer_agent.answer_questions(questions)
96
-
97
- # Step 5: Visualizations
98
- vis_agent = VisualizationAgent(df, profile['variable_types'], output_dir=output_dir)
99
- vis_paths = vis_agent.generate_visualizations()
100
-
101
- # Step 6: Hypothesis Testing
102
- hypothesis_agent = HypothesisTestingAgent(df, profile['variable_types'])
103
- test_results = hypothesis_agent.perform_tests(questions)
104
-
105
- # Step 7: Report Generation
106
- report_agent = ReportGeneratorAgent(output_dir=output_dir)
107
- report_path = report_agent.generate_report(eda_results, answers, test_results, vis_paths)
108
-
109
- print("\n=== Workflow Complete ===")
110
- print(f"Output directory: {output_dir}")
111
- print(f"Report template saved at: {report_path}")
112
-
113
- return {
114
- 'profile': profile,
115
- 'questions': questions,
116
- 'eda_results': eda_results,
117
- 'answers': answers,
118
- 'visualizations': vis_paths,
119
- 'test_results': test_results,
120
- 'report_path': report_path
121
- }
122
-
123
- except Exception as e:
124
- print(f"Error in workflow: {str(e)}")
125
- raise
126
-
127
 
128
  # ---------------------------
129
- # Entry point
130
  # ---------------------------
131
- if __name__ == "__main__":
132
- parser = argparse.ArgumentParser(description="AutoStatAgent: Automated Data Analysis")
133
- parser.add_argument('--file', type=str, help='Path to dataset file (CSV, Excel, JSON)', default=None)
134
- parser.add_argument('--format', type=str, choices=['csv', 'excel', 'json'], default='csv',
135
- help='File format (csv, excel, json)')
136
- parser.add_argument('--output-dir', type=str, default='outputs',
137
- help='Output directory for visualizations and report')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- args = parser.parse_args()
 
 
140
 
141
- main(data_source=args.file, file_format=args.format, output_dir=args.output_dir)
 
 
 
 
6
  # ---------------------------
7
  required_packages = [
8
  "pandas",
9
+ "scikit-learn",
10
+ "streamlit"
11
  ]
12
 
13
  for package in required_packages:
14
  try:
15
  __import__(package.replace("-", "_"))
16
  except ImportError:
 
17
  subprocess.check_call([sys.executable, "-m", "pip", "install", package])
18
 
19
  # ---------------------------
20
  # Imports after ensuring installation
21
  # ---------------------------
22
+ import streamlit as st
23
  import pandas as pd
 
24
  import os
25
  from sklearn.datasets import load_iris
26
 
 
33
  from agents.hypothesis_testing import HypothesisTestingAgent
34
  from agents.report_generator import ReportGeneratorAgent
35
 
 
36
  # ---------------------------
37
  # Load sample dataset
38
  # ---------------------------
 
43
  df['species'] = pd.Categorical(iris.target_names[iris.target])
44
  return df, 'dataframe'
45
 
 
46
  # ---------------------------
47
  # Main workflow
48
  # ---------------------------
49
+ def run_autostatagent(df, file_format='csv', output_dir='outputs'):
50
+ profile = DataProfilerAgent(df, file_format).profile()
51
+ questions = QuestionGeneratorAgent(df, profile['variable_types'], use_api=False).generate_questions()
52
+ eda_results = EDAAgent(df, file_format, output_dir=output_dir).perform_eda()
53
+ answers = AnswerGeneratorAgent(df, profile['variable_types']).answer_questions(questions)
54
+ vis_paths = VisualizationAgent(df, profile['variable_types'], output_dir=output_dir).generate_visualizations()
55
+ test_results = HypothesisTestingAgent(df, profile['variable_types']).perform_tests(questions)
56
+ report_path = ReportGeneratorAgent(output_dir=output_dir).generate_report(
57
+ eda_results, answers, test_results, vis_paths
58
+ )
59
+ return {
60
+ 'profile': profile,
61
+ 'questions': questions,
62
+ 'eda_results': eda_results,
63
+ 'answers': answers,
64
+ 'visualizations': vis_paths,
65
+ 'test_results': test_results,
66
+ 'report_path': report_path
67
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  # ---------------------------
70
+ # Streamlit UI
71
  # ---------------------------
72
+ st.title("📊 AutoStatAgent - Automated Data Analysis")
73
+ st.write("Upload your dataset or use the sample Iris dataset for automatic profiling, EDA, visualization, and reporting.")
74
+
75
+ uploaded_file = st.file_uploader("Upload CSV, Excel, or JSON file", type=["csv", "xlsx", "json"])
76
+ use_sample = st.checkbox("Use sample Iris dataset instead")
77
+
78
+ if uploaded_file or use_sample:
79
+ if use_sample:
80
+ df, file_format = load_sample_dataset()
81
+ else:
82
+ if uploaded_file.name.endswith(".csv"):
83
+ df = pd.read_csv(uploaded_file)
84
+ file_format = "csv"
85
+ elif uploaded_file.name.endswith(".xlsx"):
86
+ df = pd.read_excel(uploaded_file)
87
+ file_format = "excel"
88
+ elif uploaded_file.name.endswith(".json"):
89
+ df = pd.read_json(uploaded_file)
90
+ file_format = "json"
91
+ else:
92
+ st.error("Unsupported file format.")
93
+ st.stop()
94
+
95
+ st.subheader("Preview of Data")
96
+ st.dataframe(df.head())
97
+
98
+ if st.button("Run Analysis"):
99
+ with st.spinner("Running AutoStatAgent workflow..."):
100
+ results = run_autostatagent(df, file_format=file_format)
101
+
102
+ st.success("✅ Analysis Complete")
103
+
104
+ st.subheader("Dataset Profile")
105
+ st.json(results['profile'])
106
+
107
+ st.subheader("Generated Questions")
108
+ st.write(results['questions'])
109
+
110
+ st.subheader("EDA Results")
111
+ st.write(results['eda_results'])
112
+
113
+ st.subheader("Answers to Questions")
114
+ st.write(results['answers'])
115
+
116
+ st.subheader("Hypothesis Testing Results")
117
+ st.write(results['test_results'])
118
 
119
+ st.subheader("Visualizations")
120
+ for img in results['visualizations']:
121
+ st.image(img)
122
 
123
+ st.subheader("Report")
124
+ st.write(f"Report saved at: {results['report_path']}")
125
+ else:
126
+ st.info("Upload a dataset or check 'Use sample Iris dataset' to begin.")