Spaces:

jzou19950715
/

Lossdog_Data_Science_Expert

Running

App Files Files Community

jzou19950715 commited on Jan 21

Commit

882008c

verified ·

1 Parent(s): 7a8e2c4

Update app.py

Browse files

Files changed (1) hide show

app.py +168 -193

app.py CHANGED Viewed

@@ -1,210 +1,185 @@
-# app.py
-import streamlit as st
-import google.generativeai as generativeai
 import os
-import re
-import json
-import logging
 import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-import seaborn as sns
 import matplotlib.pyplot as plt
-import numpy as np
 from io import StringIO
-def load_data(uploaded_file):
-    try:
-        df = pd.read_csv(uploaded_file)
-        return df
-    except Exception as e:
-        st.error(f"Error: {str(e)}")
-        return None
-def get_numeric_columns(df):
-    return df.select_dtypes(include=['float64', 'int64']).columns
-def get_categorical_columns(df):
-    return df.select_dtypes(include=['object', 'category']).columns
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[logging.StreamHandler()]
-)
-logger = logging.getLogger(__name__)
-def configure_gemini():
-    """Configure Google's Gemini AI model."""
     try:
-        from dotenv import load_dotenv
-        load_dotenv()
-        api_key = os.getenv("GOOGLE_API_KEY")
-        if not api_key:
-            st.error("Please set your GOOGLE_API_KEY in the .env file")
-            return None
-        generativeai.configure(api_key=api_key)
-        return generativeai.GenerativeModel('gemini-1.0-pro')
     except Exception as e:
-        st.error(f"Error configuring Gemini: {str(e)}")
-        return None
-def get_ai_visualization_suggestion(df, user_query):
-    """Get AI-powered visualization suggestions based on the data and user query."""
-    model = configure_gemini()
-    if not model:
-        return None
-    # Create a prompt for the AI
-    columns_info = {
-        'column_names': list(df.columns),
-        'data_types': {col: str(df[col].dtype) for col in df.columns},
-        'sample_values': {col: df[col].head().tolist() for col in df.columns}
-    }
-    prompt = f"""
-    Analyze this dataset and the user's query to suggest the best visualization approach:
-    User Query: {user_query}
-    Dataset Information:
-    {json.dumps(columns_info, indent=2)}
-    Please suggest:
-    1. The most appropriate type of visualization
-    2. Which columns should be used
-    3. Any data transformations needed
-    4. Visualization parameters (like color schemes, labels, etc.)
-    Format your response as JSON with the following structure:
-    {{
-        "viz_type": "type of visualization",
-        "columns": ["column1", "column2"],
-        "transformations": ["transformation1", "transformation2"],
-        "parameters": {{
-            "param1": "value1",
-            "param2": "value2"
-        }}
-    }}
-    """
     try:
-        response = model.generate_content(prompt)
-        # Extract JSON from response
-        suggestion = json.loads(response.text)
-        return suggestion
-    except Exception as e:
-        logger.error(f"Error getting AI suggestion: {str(e)}")
-        return None
-def main():
-    st.title("📊 AI-Powered Data Visualization Dashboard")
-    st.write("Upload your CSV file and explore the data through various visualizations!")
-    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
-    if uploaded_file is not None:
-        df = load_data(uploaded_file)
-        if df is not None:
-            st.success("File successfully loaded!")
-            # Basic Data Info
-            st.header("📝 Data Overview")
-            st.write(f"Number of rows: {df.shape[0]}")
-            st.write(f"Number of columns: {df.shape[1]}")
-            # Data Preview
-            st.subheader("Data Preview")
-            st.dataframe(df.head())
-            # Missing Values Analysis
-            st.subheader("Missing Values Analysis")
-            missing_data = df.isnull().sum()
-            if missing_data.sum() > 0:
-                st.write("Missing values by column:")
-                st.write(missing_data[missing_data > 0])
-            else:
-                st.write("No missing values found in the dataset!")
-            # User Query for AI Suggestions
-            st.header("🤖 AI-Powered Visualization")
-            user_query = st.text_input("Describe what you want to visualize",
-                                     "Show me trends in the data")
-            if st.button("Get AI Suggestion"):
-                with st.spinner("Getting AI visualization
-            viz_type = st.selectbox(
-                "Choose visualization type",
-                ["Scatter Plot", "Line Plot", "Bar Plot", "Histogram", "Box Plot", "Correlation Heatmap"]
-            )
-            numeric_columns = get_numeric_columns(df)
-            categorical_columns = get_categorical_columns(df)
-            if viz_type == "Scatter Plot" and len(numeric_columns) >= 2:
-                x_col = st.selectbox("Select X axis", numeric_columns)
-                y_col = st.selectbox("Select Y axis", numeric_columns)
-                color_col = st.selectbox("Select Color variable (optional)",
-                                       ["None"] + list(df.columns))
-                if color_col == "None":
-                    fig = px.scatter(df, x=x_col, y=y_col)
-                else:
-                    fig = px.scatter(df, x=x_col, y=y_col, color=color_col)
-                st.plotly_chart(fig)
-            elif viz_type == "Line Plot" and len(numeric_columns) >= 1:
-                x_col = st.selectbox("Select X axis", df.columns)
-                y_col = st.selectbox("Select Y axis", numeric_columns)
-                fig = px.line(df, x=x_col, y=y_col)
-                st.plotly_chart(fig)
-            elif viz_type == "Bar Plot":
-                x_col = st.selectbox("Select X axis", df.columns)
-                y_col = st.selectbox("Select Y axis", numeric_columns)
-                fig = px.bar(df, x=x_col, y=y_col)
-                st.plotly_chart(fig)
-            elif viz_type == "Histogram" and len(numeric_columns) >= 1:
-                col = st.selectbox("Select column", numeric_columns)
-                bins = st.slider("Number of bins", min_value=5, max_value=100, value=30)
-                fig = px.histogram(df, x=col, nbins=bins)
-                st.plotly_chart(fig)
-            elif viz_type == "Box Plot" and len(numeric_columns) >= 1:
-                y_col = st.selectbox("Select column for box plot", numeric_columns)
-                x_col = st.selectbox("Select grouping variable (optional)",
-                                   ["None"] + list(categorical_columns))
-                if x_col == "None":
-                    fig = px.box(df, y=y_col)
-                else:
-                    fig = px.box(df, x=x_col, y=y_col)
-                st.plotly_chart(fig)
-            elif viz_type == "Correlation Heatmap" and len(numeric_columns) >= 2:
-                corr_matrix = df[numeric_columns].corr()
-                fig = px.imshow(corr_matrix,
-                              labels=dict(color="Correlation"),
-                              x=corr_matrix.columns,
-                              y=corr_matrix.columns)
-                st.plotly_chart(fig)
-            # Data Summary
-            st.header("📊 Data Summary")
-            if len(numeric_columns) > 0:
-                st.subheader("Numerical Columns Summary")
-                st.write(df[numeric_columns].describe())
-            if len(categorical_columns) > 0:
-                st.subheader("Categorical Columns Summary")
-                for col in categorical_columns:
-                    st.write(f"\nValue counts for {col}:")
-                    st.write(df[col].value_counts())
 if __name__ == "__main__":
-    main()

 import os
 import pandas as pd
+import requests
+import json
+import subprocess
+import gradio as gr
+import tempfile
+import sys
 import matplotlib.pyplot as plt
 from io import StringIO
+def query_api(prompt, api_url, api_key, system_prompt):
+    """Send a prompt to the specified API and return the response."""
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+    payload = {
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": prompt}
+        ],
+        "stream": False
+    }
+    try:
+        response = requests.post(api_url, headers=headers, json=payload)
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"]
+    except requests.exceptions.RequestException as e:
+        return f"API Error: {str(e)}"
+def install_package(package):
+    """Install a Python package using pip."""
+    try:
+        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
+        return True
+    except subprocess.CalledProcessError:
+        return False
+def safe_execute_code(code, globals_dict=None):
+    """Safely execute the generated Python code in a restricted environment."""
+    if globals_dict is None:
+        globals_dict = {}
+    # Redirect stdout to capture print outputs
+    old_stdout = sys.stdout
+    redirected_output = StringIO()
+    sys.stdout = redirected_output
     try:
+        # Execute the code in the restricted environment
+        exec(code, globals_dict)
+        output = redirected_output.getvalue()
+        return True, output
     except Exception as e:
+        return False, f"Error executing code: {str(e)}"
+    finally:
+        sys.stdout = old_stdout
+def analyze_data(csv_file, api_url, api_key, system_prompt):
+    """Analyze the uploaded CSV file using the specified API."""
+    if not csv_file:
+        return "No file uploaded.", None, None
     try:
+        # Read the CSV file
+        df = pd.read_csv(csv_file.name)
+        columns = df.columns.tolist()
+        sample_data = df.head(3).to_dict()
+        # Build the prompt
+        prompt = (
+            f"I have a CSV file with columns: {columns}. "
+            f"The first few rows are: {sample_data}. "
+            "Please generate Python code to analyze this data. Include:"
+            "1. Basic statistical analysis"
+            "2. Data visualization using matplotlib or seaborn"
+            "3. Any interesting patterns or insights"
+            "Make sure to use only standard data science libraries."
+        )
+        # Get code from API
+        generated_code = query_api(prompt, api_url, api_key, system_prompt)
+        # Create a temporary directory for generated files
+        with tempfile.TemporaryDirectory() as temp_dir:
+            os.chdir(temp_dir)
+            # Save the DataFrame in the temporary directory
+            df.to_csv("input_data.csv", index=False)
+            # Prepare the execution environment
+            globals_dict = {
+                'pd': pd,
+                'plt': plt,
+                'df': df,
+                '__file__': 'input_data.csv'
+            }
+            # Execute the code
+            success, execution_output = safe_execute_code(generated_code, globals_dict)
+            if not success:
+                return "Code execution failed.", generated_code, execution_output
+            # Save any generated plots
+            if plt.get_figs():
+                plt.savefig("visualization.png")
+                plt.close('all')
+                if os.path.exists("visualization.png"):
+                    return "Analysis completed successfully.", generated_code, (execution_output, "visualization.png")
+            return "Analysis completed successfully.", generated_code, (execution_output, None)
+    except Exception as e:
+        return f"Error during analysis: {str(e)}", None, None
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks() as interface:
+        gr.Markdown("# AI-Powered Data Analysis Tool")
+        with gr.Row():
+            with gr.Column():
+                api_url = gr.Textbox(
+                    label="API URL",
+                    placeholder="Enter your API endpoint URL",
+                    type="text"
+                )
+                api_key = gr.Textbox(
+                    label="API Key",
+                    placeholder="Enter your API key",
+                    type="password"
+                )
+                system_prompt = gr.Textbox(
+                    label="System Prompt",
+                    placeholder="Enter system prompt for the AI",
+                    value="You are an AI assistant specialized in data analysis, visualization, and Python programming.",
+                    lines=3
+                )
+                csv_file = gr.File(
+                    label="Upload CSV File",
+                    file_types=[".csv"]
+                )
+                analyze_button = gr.Button("Analyze Data")
+            with gr.Column():
+                status_output = gr.Textbox(label="Status")
+                code_output = gr.Code(
+                    label="Generated Code",
+                    language="python"
+                )
+                with gr.Row():
+                    text_output = gr.Textbox(
+                        label="Analysis Output",
+                        lines=10
+                    )
+                    image_output = gr.Image(
+                        label="Visualization",
+                        type="filepath"
+                    )
+        analyze_button.click(
+            fn=analyze_data,
+            inputs=[csv_file, api_url, api_key, system_prompt],
+            outputs=[status_output, code_output, [text_output, image_output]]
+        )
+        gr.Markdown("""
+        ## How to Use
+        1. Enter your API URL and key for the AI service you want to use (e.g., OpenAI, DeepSeek)
+        2. Customize the system prompt if desired
+        3. Upload a CSV file
+        4. Click 'Analyze Data' to generate and execute analysis code
+        The tool will generate Python code to analyze your data and create visualizations.
+        """)
+    return interface
 if __name__ == "__main__":
+    interface = create_interface()
+    interface.launch()