Spaces:

Abdullah123456789
/

hafiz-auto-agent

Sleeping

App Files Files Community

Abdullah123456789 commited on 8 days ago

Commit

06bd2e9

verified ·

1 Parent(s): 5d088a5

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -91

app.py CHANGED Viewed

@@ -6,21 +6,21 @@ import sys
 # ---------------------------
 required_packages = [
     "pandas",
-    "scikit-learn"
 ]
 for package in required_packages:
     try:
         __import__(package.replace("-", "_"))
     except ImportError:
-        print(f"Installing missing package: {package}")
         subprocess.check_call([sys.executable, "-m", "pip", "install", package])
 # ---------------------------
 # Imports after ensuring installation
 # ---------------------------
 import pandas as pd
-import argparse
 import os
 from sklearn.datasets import load_iris
@@ -33,7 +33,6 @@ from agents.visualization import VisualizationAgent
 from agents.hypothesis_testing import HypothesisTestingAgent
 from agents.report_generator import ReportGeneratorAgent
 # ---------------------------
 # Load sample dataset
 # ---------------------------
@@ -44,98 +43,84 @@ def load_sample_dataset():
     df['species'] = pd.Categorical(iris.target_names[iris.target])
     return df, 'dataframe'
 # ---------------------------
 # Main workflow
 # ---------------------------
-def main(data_source=None, file_format='csv', output_dir='outputs'):
-    """
-    Main function to run the AutoStatAgent workflow.
-    Args:
-        data_source: Path to dataset file or None for sample dataset
-        file_format: 'csv', 'excel', 'json', or 'dataframe'
-        output_dir: Directory for outputs (visualizations and report)
-    """
-    try:
-        # Load dataset
-        if data_source is None:
-            print("No dataset provided. Using sample Iris dataset.")
-            df, file_format = load_sample_dataset()
-        else:
-            df = data_source
-            if not isinstance(df, pd.DataFrame):
-                if not os.path.exists(data_source):
-                    raise FileNotFoundError(f"Dataset file not found: {data_source}")
-        print("\n=== AutoStatAgent Workflow ===")
-        # Step 1: Data Profiling
-        profiler = DataProfilerAgent(df, file_format)
-        profile = profiler.profile()
-        print("\nDataset Profile:")
-        print(f"Shape: {profile['shape']}")
-        print(f"Columns: {profile['columns']}")
-        print(f"Missing Values: {profile['missing_values']}")
-        print(f"Duplicate Rows: {profile['duplicate_rows']}")
-        print("\nVariable Types:")
-        for var_type, cols in profile['variable_types'].items():
-            print(f"{var_type.capitalize()}: {cols}")
-        # Step 2: Question Generation
-        question_generator = QuestionGeneratorAgent(df, profile['variable_types'])
-        questions = question_generator.generate_questions()
-        # Step 3: Exploratory Data Analysis
-        eda_agent = EDAAgent(df, file_format, output_dir=output_dir)
-        eda_results = eda_agent.perform_eda()
-        # Step 4: Answer Generation
-        answer_agent = AnswerGeneratorAgent(df, profile['variable_types'])
-        answers = answer_agent.answer_questions(questions)
-        # Step 5: Visualizations
-        vis_agent = VisualizationAgent(df, profile['variable_types'], output_dir=output_dir)
-        vis_paths = vis_agent.generate_visualizations()
-        # Step 6: Hypothesis Testing
-        hypothesis_agent = HypothesisTestingAgent(df, profile['variable_types'])
-        test_results = hypothesis_agent.perform_tests(questions)
-        # Step 7: Report Generation
-        report_agent = ReportGeneratorAgent(output_dir=output_dir)
-        report_path = report_agent.generate_report(eda_results, answers, test_results, vis_paths)
-        print("\n=== Workflow Complete ===")
-        print(f"Output directory: {output_dir}")
-        print(f"Report template saved at: {report_path}")
-        return {
-            'profile': profile,
-            'questions': questions,
-            'eda_results': eda_results,
-            'answers': answers,
-            'visualizations': vis_paths,
-            'test_results': test_results,
-            'report_path': report_path
-        }
-    except Exception as e:
-        print(f"Error in workflow: {str(e)}")
-        raise
 # ---------------------------
-# Entry point
 # ---------------------------
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="AutoStatAgent: Automated Data Analysis")
-    parser.add_argument('--file', type=str, help='Path to dataset file (CSV, Excel, JSON)', default=None)
-    parser.add_argument('--format', type=str, choices=['csv', 'excel', 'json'], default='csv',
-                        help='File format (csv, excel, json)')
-    parser.add_argument('--output-dir', type=str, default='outputs',
-                        help='Output directory for visualizations and report')
-    args = parser.parse_args()
-    main(data_source=args.file, file_format=args.format, output_dir=args.output_dir)

 # ---------------------------
 required_packages = [
     "pandas",
+    "scikit-learn",
+    "streamlit"
 ]
 for package in required_packages:
     try:
         __import__(package.replace("-", "_"))
     except ImportError:
         subprocess.check_call([sys.executable, "-m", "pip", "install", package])
 # ---------------------------
 # Imports after ensuring installation
 # ---------------------------
+import streamlit as st
 import pandas as pd
 import os
 from sklearn.datasets import load_iris
 from agents.hypothesis_testing import HypothesisTestingAgent
 from agents.report_generator import ReportGeneratorAgent
 # ---------------------------
 # Load sample dataset
 # ---------------------------
     df['species'] = pd.Categorical(iris.target_names[iris.target])
     return df, 'dataframe'
 # ---------------------------
 # Main workflow
 # ---------------------------
+def run_autostatagent(df, file_format='csv', output_dir='outputs'):
+    profile = DataProfilerAgent(df, file_format).profile()
+    questions = QuestionGeneratorAgent(df, profile['variable_types'], use_api=False).generate_questions()
+    eda_results = EDAAgent(df, file_format, output_dir=output_dir).perform_eda()
+    answers = AnswerGeneratorAgent(df, profile['variable_types']).answer_questions(questions)
+    vis_paths = VisualizationAgent(df, profile['variable_types'], output_dir=output_dir).generate_visualizations()
+    test_results = HypothesisTestingAgent(df, profile['variable_types']).perform_tests(questions)
+    report_path = ReportGeneratorAgent(output_dir=output_dir).generate_report(
+        eda_results, answers, test_results, vis_paths
+    )
+    return {
+        'profile': profile,
+        'questions': questions,
+        'eda_results': eda_results,
+        'answers': answers,
+        'visualizations': vis_paths,
+        'test_results': test_results,
+        'report_path': report_path
+    }
 # ---------------------------
+# Streamlit UI
 # ---------------------------
+st.title("📊 AutoStatAgent - Automated Data Analysis")
+st.write("Upload your dataset or use the sample Iris dataset for automatic profiling, EDA, visualization, and reporting.")
+uploaded_file = st.file_uploader("Upload CSV, Excel, or JSON file", type=["csv", "xlsx", "json"])
+use_sample = st.checkbox("Use sample Iris dataset instead")
+if uploaded_file or use_sample:
+    if use_sample:
+        df, file_format = load_sample_dataset()
+    else:
+        if uploaded_file.name.endswith(".csv"):
+            df = pd.read_csv(uploaded_file)
+            file_format = "csv"
+        elif uploaded_file.name.endswith(".xlsx"):
+            df = pd.read_excel(uploaded_file)
+            file_format = "excel"
+        elif uploaded_file.name.endswith(".json"):
+            df = pd.read_json(uploaded_file)
+            file_format = "json"
+        else:
+            st.error("Unsupported file format.")
+            st.stop()
+    st.subheader("Preview of Data")
+    st.dataframe(df.head())
+    if st.button("Run Analysis"):
+        with st.spinner("Running AutoStatAgent workflow..."):
+            results = run_autostatagent(df, file_format=file_format)
+        st.success("✅ Analysis Complete")
+        st.subheader("Dataset Profile")
+        st.json(results['profile'])
+        st.subheader("Generated Questions")
+        st.write(results['questions'])
+        st.subheader("EDA Results")
+        st.write(results['eda_results'])
+        st.subheader("Answers to Questions")
+        st.write(results['answers'])
+        st.subheader("Hypothesis Testing Results")
+        st.write(results['test_results'])
+        st.subheader("Visualizations")
+        for img in results['visualizations']:
+            st.image(img)
+        st.subheader("Report")
+        st.write(f"Report saved at: {results['report_path']}")
+else:
+    st.info("Upload a dataset or check 'Use sample Iris dataset' to begin.")