Spaces:

jzou19950715
/

Lossdog_Data_Science_Expert

Sleeping

App Files Files Community

jzou19950715 commited on Jan 21

Commit

fc29f53

verified ·

1 Parent(s): a1bec31

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -93

app.py CHANGED Viewed

@@ -6,11 +6,81 @@ import subprocess
 import gradio as gr
 import tempfile
 import sys
-import matplotlib.pyplot as plt
 from io import StringIO
-def query_api(prompt, api_url, api_key, system_prompt):
-    """Send a prompt to the specified API and return the response."""
     headers = {
         "Content-Type": "application/json",
         "Authorization": f"Bearer {api_key}"
@@ -20,8 +90,7 @@ def query_api(prompt, api_url, api_key, system_prompt):
         "messages": [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": prompt}
-        ],
-        "stream": False
     }
     try:
@@ -31,94 +100,55 @@ def query_api(prompt, api_url, api_key, system_prompt):
     except requests.exceptions.RequestException as e:
         return f"API Error: {str(e)}"
-def install_package(package):
-    """Install a Python package using pip."""
-    try:
-        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
-        return True
-    except subprocess.CalledProcessError:
-        return False
-def safe_execute_code(code, globals_dict=None):
-    """Safely execute the generated Python code in a restricted environment."""
-    if globals_dict is None:
-        globals_dict = {}
-    # Redirect stdout to capture print outputs
-    old_stdout = sys.stdout
-    redirected_output = StringIO()
-    sys.stdout = redirected_output
-    try:
-        # Execute the code in the restricted environment
-        exec(code, globals_dict)
-        output = redirected_output.getvalue()
-        return True, output
-    except Exception as e:
-        return False, f"Error executing code: {str(e)}"
-    finally:
-        sys.stdout = old_stdout
-def analyze_data(csv_file, api_url, api_key, system_prompt):
-    """Analyze the uploaded CSV file using the specified API."""
     if not csv_file:
-        return "No file uploaded.", None, None
     try:
         # Read the CSV file
         df = pd.read_csv(csv_file.name)
         columns = df.columns.tolist()
         sample_data = df.head(3).to_dict()
         # Build the prompt
-        prompt = (
-            f"I have a CSV file with columns: {columns}. "
-            f"The first few rows are: {sample_data}. "
-            "Please generate Python code to analyze this data. Include:"
-            "1. Basic statistical analysis"
-            "2. Data visualization using matplotlib or seaborn"
-            "3. Any interesting patterns or insights"
-            "Make sure to use only standard data science libraries."
-        )
-        # Get code from API
-        generated_code = query_api(prompt, api_url, api_key, system_prompt)
-        # Create a temporary directory for generated files
-        with tempfile.TemporaryDirectory() as temp_dir:
-            os.chdir(temp_dir)
-            # Save the DataFrame in the temporary directory
-            df.to_csv("input_data.csv", index=False)
-            # Prepare the execution environment
-            globals_dict = {
-                'pd': pd,
-                'plt': plt,
-                'df': df,
-                '__file__': 'input_data.csv'
-            }
-            # Execute the code
-            success, execution_output = safe_execute_code(generated_code, globals_dict)
-            if not success:
-                return "Code execution failed.", generated_code, execution_output
-            # Save any generated plots
-            if plt.get_figs():
-                plt.savefig("visualization.png")
-                plt.close('all')
-                if os.path.exists("visualization.png"):
-                    return "Analysis completed successfully.", generated_code, (execution_output, "visualization.png")
-            return "Analysis completed successfully.", generated_code, (execution_output, None)
     except Exception as e:
-        return f"Error during analysis: {str(e)}", None, None
-# Create Gradio interface
 def create_interface():
     with gr.Blocks() as interface:
         gr.Markdown("# AI-Powered Data Analysis Tool")
@@ -126,18 +156,18 @@ def create_interface():
             with gr.Column():
                 api_url = gr.Textbox(
                     label="API URL",
-                    placeholder="Enter your API endpoint URL",
                     type="text"
                 )
                 api_key = gr.Textbox(
                     label="API Key",
-                    placeholder="Enter your API key",
                     type="password"
                 )
                 system_prompt = gr.Textbox(
                     label="System Prompt",
                     placeholder="Enter system prompt for the AI",
-                    value="You are an AI assistant specialized in data analysis, visualization, and Python programming.",
                     lines=3
                 )
                 csv_file = gr.File(
@@ -152,30 +182,33 @@ def create_interface():
                     label="Generated Code",
                     language="python"
                 )
-                with gr.Row():
-                    text_output = gr.Textbox(
-                        label="Analysis Output",
-                        lines=10
-                    )
-                    image_output = gr.Image(
-                        label="Visualization",
-                        type="filepath"
-                    )
         analyze_button.click(
             fn=analyze_data,
             inputs=[csv_file, api_url, api_key, system_prompt],
-            outputs=[status_output, code_output, [text_output, image_output]]
         )
         gr.Markdown("""
         ## How to Use
-        1. Enter your API URL and key for the AI service you want to use (e.g., OpenAI, DeepSeek)
         2. Customize the system prompt if desired
-        3. Upload a CSV file
         4. Click 'Analyze Data' to generate and execute analysis code
-        The tool will generate Python code to analyze your data and create visualizations.
         """)
     return interface

 import gradio as gr
 import tempfile
 import sys
 from io import StringIO
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+from typing import Dict, Any, Tuple, Optional
+import ast
+# Safe imports list - mirrors smolagents approach
+SAFE_IMPORTS = [
+    "pandas", "numpy", "matplotlib", "seaborn", "sklearn",
+    "scipy", "statsmodels", "plotly", "math", "datetime",
+    "collections", "itertools", "functools", "operator"
+]
+class SafeExecutor:
+    """Safely executes Python code with restricted imports and environment"""
+    def __init__(self, allowed_imports=None):
+        self.allowed_imports = allowed_imports or SAFE_IMPORTS
+    def validate_imports(self, code: str) -> bool:
+        """Validate that all imports in the code are allowed"""
+        try:
+            tree = ast.parse(code)
+            for node in ast.walk(tree):
+                if isinstance(node, (ast.Import, ast.ImportFrom)):
+                    for name in node.names:
+                        module = name.name.split('.')[0]
+                        if module not in self.allowed_imports:
+                            raise ValueError(f"Import of '{module}' is not allowed. Allowed imports: {self.allowed_imports}")
+            return True
+        except Exception as e:
+            raise ValueError(f"Code validation error: {str(e)}")
+    def execute_code(self, code: str, globals_dict: Dict[str, Any] = None) -> Tuple[Any, str]:
+        """Execute code safely and return the output"""
+        if globals_dict is None:
+            globals_dict = {}
+        # Add safe imports to globals
+        for module in self.allowed_imports:
+            try:
+                globals_dict[module] = __import__(module)
+            except ImportError:
+                pass
+        # Redirect stdout to capture print outputs
+        old_stdout = sys.stdout
+        redirected_output = StringIO()
+        sys.stdout = redirected_output
+        try:
+            # Validate imports first
+            self.validate_imports(code)
+            # Execute the code
+            exec(code, globals_dict)
+            output = redirected_output.getvalue()
+            # Handle matplotlib figures
+            if plt.get_figs():
+                with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
+                    plt.savefig(tmp.name)
+                    plt.close('all')
+                    return tmp.name, output
+            return None, output
+        except Exception as e:
+            return None, f"Error executing code:\n{str(e)}"
+        finally:
+            sys.stdout = old_stdout
+def query_api(prompt: str, api_url: str, api_key: str, system_prompt: str) -> str:
+    """Send a prompt to the specified API and return the response"""
     headers = {
         "Content-Type": "application/json",
         "Authorization": f"Bearer {api_key}"
         "messages": [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": prompt}
+        ]
     }
     try:
     except requests.exceptions.RequestException as e:
         return f"API Error: {str(e)}"
+def analyze_data(
+    csv_file: str,
+    api_url: str,
+    api_key: str,
+    system_prompt: str
+) -> Tuple[str, str, str, Optional[str]]:
+    """Analyze uploaded CSV data using the API and execute the generated code"""
     if not csv_file:
+        return "No file uploaded.", None, None, None
     try:
+        # Create safe executor
+        executor = SafeExecutor()
         # Read the CSV file
         df = pd.read_csv(csv_file.name)
         columns = df.columns.tolist()
         sample_data = df.head(3).to_dict()
         # Build the prompt
+        prompt = f"""Analyze this CSV file with columns: {columns}.
+Sample data: {sample_data}
+Generate Python code that:
+1. Creates insightful visualizations using matplotlib or seaborn
+2. Performs relevant statistical analysis
+3. Identifies key patterns or insights
+4. Properly handles potential data issues
+Important: Use only these libraries: {', '.join(SAFE_IMPORTS)}"""
+        # Get code from API
+        generated_code = query_api(prompt, api_url, api_key, system_prompt)
+        # Create execution environment
+        globals_dict = {'df': df, 'pd': pd, 'np': np, 'plt': plt, 'sns': sns}
+        # Execute the code
+        vis_path, execution_output = executor.execute_code(generated_code, globals_dict)
+        status = "Analysis completed successfully."
+        return status, generated_code, execution_output, vis_path
     except Exception as e:
+        return f"Error during analysis: {str(e)}", None, None, None
 def create_interface():
+    """Create the Gradio interface"""
     with gr.Blocks() as interface:
         gr.Markdown("# AI-Powered Data Analysis Tool")
             with gr.Column():
                 api_url = gr.Textbox(
                     label="API URL",
+                    placeholder="Enter API endpoint URL",
                     type="text"
                 )
                 api_key = gr.Textbox(
                     label="API Key",
+                    placeholder="Enter API key",
                     type="password"
                 )
                 system_prompt = gr.Textbox(
                     label="System Prompt",
                     placeholder="Enter system prompt for the AI",
+                    value="You are an AI assistant specialized in data analysis and visualization.",
                     lines=3
                 )
                 csv_file = gr.File(
                     label="Generated Code",
                     language="python"
                 )
+                execution_output = gr.Textbox(
+                    label="Execution Output",
+                    lines=10
+                )
+                visualization_output = gr.Image(
+                    label="Visualization",
+                    type="filepath"
+                )
         analyze_button.click(
             fn=analyze_data,
             inputs=[csv_file, api_url, api_key, system_prompt],
+            outputs=[status_output, code_output, execution_output, visualization_output]
         )
         gr.Markdown("""
         ## How to Use
+        1. Enter your API URL and key (supports various API providers)
         2. Customize the system prompt if desired
+        3. Upload a CSV file for analysis
         4. Click 'Analyze Data' to generate and execute analysis code
+        The tool will:
+        - Generate Python code to analyze your data
+        - Execute the code safely in a controlled environment
+        - Display both textual results and visualizations
+        - Support common data science libraries
         """)
     return interface