Spaces:
Build error
Build error
import os | |
import pandas as pd | |
import requests | |
import json | |
import subprocess | |
import gradio as gr | |
import tempfile | |
import sys | |
from io import StringIO | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import numpy as np | |
from typing import Dict, Any, Tuple, Optional | |
import ast | |
# Safe imports list - mirrors smolagents approach | |
SAFE_IMPORTS = [ | |
"pandas", "numpy", "matplotlib", "seaborn", "sklearn", | |
"scipy", "statsmodels", "plotly", "math", "datetime", | |
"collections", "itertools", "functools", "operator" | |
] | |
class SafeExecutor: | |
"""Safely executes Python code with restricted imports and environment""" | |
def __init__(self, allowed_imports=None): | |
self.allowed_imports = allowed_imports or SAFE_IMPORTS | |
def validate_imports(self, code: str) -> bool: | |
"""Validate that all imports in the code are allowed""" | |
try: | |
tree = ast.parse(code) | |
for node in ast.walk(tree): | |
if isinstance(node, (ast.Import, ast.ImportFrom)): | |
for name in node.names: | |
module = name.name.split('.')[0] | |
if module not in self.allowed_imports: | |
raise ValueError(f"Import of '{module}' is not allowed. Allowed imports: {self.allowed_imports}") | |
return True | |
except Exception as e: | |
raise ValueError(f"Code validation error: {str(e)}") | |
def execute_code(self, code: str, globals_dict: Dict[str, Any] = None) -> Tuple[Any, str]: | |
"""Execute code safely and return the output""" | |
if globals_dict is None: | |
globals_dict = {} | |
# Add safe imports to globals | |
for module in self.allowed_imports: | |
try: | |
globals_dict[module] = __import__(module) | |
except ImportError: | |
pass | |
# Redirect stdout to capture print outputs | |
old_stdout = sys.stdout | |
redirected_output = StringIO() | |
sys.stdout = redirected_output | |
try: | |
# Validate imports first | |
self.validate_imports(code) | |
# Execute the code | |
exec(code, globals_dict) | |
output = redirected_output.getvalue() | |
# Handle matplotlib figures | |
if plt.get_figs(): | |
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp: | |
plt.savefig(tmp.name) | |
plt.close('all') | |
return tmp.name, output | |
return None, output | |
except Exception as e: | |
return None, f"Error executing code:\n{str(e)}" | |
finally: | |
sys.stdout = old_stdout | |
def query_api(prompt: str, api_url: str, api_key: str, system_prompt: str) -> str: | |
"""Send a prompt to the specified API and return the response""" | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {api_key}" | |
} | |
payload = { | |
"messages": [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": prompt} | |
] | |
} | |
try: | |
response = requests.post(api_url, headers=headers, json=payload) | |
response.raise_for_status() | |
return response.json()["choices"][0]["message"]["content"] | |
except requests.exceptions.RequestException as e: | |
return f"API Error: {str(e)}" | |
def analyze_data( | |
csv_file: str, | |
api_url: str, | |
api_key: str, | |
system_prompt: str | |
) -> Tuple[str, str, str, Optional[str]]: | |
"""Analyze uploaded CSV data using the API and execute the generated code""" | |
if not csv_file: | |
return "No file uploaded.", None, None, None | |
try: | |
# Create safe executor | |
executor = SafeExecutor() | |
# Read the CSV file | |
df = pd.read_csv(csv_file.name) | |
columns = df.columns.tolist() | |
sample_data = df.head(3).to_dict() | |
# Build the prompt | |
prompt = f"""Analyze this CSV file with columns: {columns}. | |
Sample data: {sample_data} | |
Generate Python code that: | |
1. Creates insightful visualizations using matplotlib or seaborn | |
2. Performs relevant statistical analysis | |
3. Identifies key patterns or insights | |
4. Properly handles potential data issues | |
Important: Use only these libraries: {', '.join(SAFE_IMPORTS)}""" | |
# Get code from API | |
generated_code = query_api(prompt, api_url, api_key, system_prompt) | |
# Create execution environment | |
globals_dict = {'df': df, 'pd': pd, 'np': np, 'plt': plt, 'sns': sns} | |
# Execute the code | |
vis_path, execution_output = executor.execute_code(generated_code, globals_dict) | |
status = "Analysis completed successfully." | |
return status, generated_code, execution_output, vis_path | |
except Exception as e: | |
return f"Error during analysis: {str(e)}", None, None, None | |
def create_interface(): | |
"""Create the Gradio interface""" | |
with gr.Blocks() as interface: | |
gr.Markdown("# AI-Powered Data Analysis Tool") | |
with gr.Row(): | |
with gr.Column(): | |
api_url = gr.Textbox( | |
label="API URL", | |
placeholder="Enter API endpoint URL", | |
type="text" | |
) | |
api_key = gr.Textbox( | |
label="API Key", | |
placeholder="Enter API key", | |
type="password" | |
) | |
system_prompt = gr.Textbox( | |
label="System Prompt", | |
placeholder="Enter system prompt for the AI", | |
value="You are an AI assistant specialized in data analysis and visualization.", | |
lines=3 | |
) | |
csv_file = gr.File( | |
label="Upload CSV File", | |
file_types=[".csv"] | |
) | |
analyze_button = gr.Button("Analyze Data") | |
with gr.Column(): | |
status_output = gr.Textbox(label="Status") | |
code_output = gr.Code( | |
label="Generated Code", | |
language="python" | |
) | |
execution_output = gr.Textbox( | |
label="Execution Output", | |
lines=10 | |
) | |
visualization_output = gr.Image( | |
label="Visualization", | |
type="filepath" | |
) | |
analyze_button.click( | |
fn=analyze_data, | |
inputs=[csv_file, api_url, api_key, system_prompt], | |
outputs=[status_output, code_output, execution_output, visualization_output] | |
) | |
gr.Markdown(""" | |
## How to Use | |
1. Enter your API URL and key (supports various API providers) | |
2. Customize the system prompt if desired | |
3. Upload a CSV file for analysis | |
4. Click 'Analyze Data' to generate and execute analysis code | |
The tool will: | |
- Generate Python code to analyze your data | |
- Execute the code safely in a controlled environment | |
- Display both textual results and visualizations | |
- Support common data science libraries | |
""") | |
return interface | |
if __name__ == "__main__": | |
interface = create_interface() | |
interface.launch() |