Spaces:

jzou19950715
/

Lossdog_Data_Science_Expert

Running

App Files Files Community

jzou19950715 commited on Jan 21

Commit

7cdbd20

verified ·

1 Parent(s): 40864e7

Update app.py

Browse files

Files changed (1) hide show

app.py +173 -149

app.py CHANGED Viewed

@@ -8,207 +8,231 @@ import tempfile
 import sys
 from io import StringIO
 import matplotlib.pyplot as plt
-import seaborn as sns
-import numpy as np
-from typing import Dict, Any, Tuple, Optional
-import ast
-# Safe imports list - mirrors smolagents approach
-SAFE_IMPORTS = [
-    "pandas", "numpy", "matplotlib", "seaborn", "sklearn",
-    "scipy", "statsmodels", "plotly", "math", "datetime",
-    "collections", "itertools", "functools", "operator"
-]
-class SafeExecutor:
-    """Safely executes Python code with restricted imports and environment"""
-    def __init__(self, allowed_imports=None):
-        self.allowed_imports = allowed_imports or SAFE_IMPORTS
-    def validate_imports(self, code: str) -> bool:
-        """Validate that all imports in the code are allowed"""
-        try:
-            tree = ast.parse(code)
-            for node in ast.walk(tree):
-                if isinstance(node, (ast.Import, ast.ImportFrom)):
-                    for name in node.names:
-                        module = name.name.split('.')[0]
-                        if module not in self.allowed_imports:
-                            raise ValueError(f"Import of '{module}' is not allowed. Allowed imports: {self.allowed_imports}")
-            return True
-        except Exception as e:
-            raise ValueError(f"Code validation error: {str(e)}")
-    def execute_code(self, code: str, globals_dict: Dict[str, Any] = None) -> Tuple[Any, str]:
-        """Execute code safely and return the output"""
-        if globals_dict is None:
-            globals_dict = {}
-        # Add safe imports to globals
-        for module in self.allowed_imports:
-            try:
-                globals_dict[module] = __import__(module)
-            except ImportError:
-                pass
-        # Redirect stdout to capture print outputs
-        old_stdout = sys.stdout
-        redirected_output = StringIO()
-        sys.stdout = redirected_output
-        try:
-            # Validate imports first
-            self.validate_imports(code)
-            # Execute the code
-            exec(code, globals_dict)
-            output = redirected_output.getvalue()
-            # Handle matplotlib figures
-            if plt.get_figs():
                 with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
-                    plt.savefig(tmp.name)
-                    plt.close('all')
-                    return tmp.name, output
-            return None, output
-        except Exception as e:
-            return None, f"Error executing code:\n{str(e)}"
-        finally:
-            sys.stdout = old_stdout
-def query_api(prompt: str, api_url: str, api_key: str, system_prompt: str) -> str:
-    """Send a prompt to the specified API and return the response"""
     headers = {
         "Content-Type": "application/json",
         "Authorization": f"Bearer {api_key}"
     }
     payload = {
-        "messages": [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": prompt}
-        ]
     }
     try:
-        response = requests.post(api_url, headers=headers, json=payload)
         response.raise_for_status()
         return response.json()["choices"][0]["message"]["content"]
-    except requests.exceptions.RequestException as e:
         return f"API Error: {str(e)}"
-def analyze_data(
-    csv_file: str,
-    api_url: str,
-    api_key: str,
-    system_prompt: str
-) -> Tuple[str, str, str, Optional[str]]:
-    """Analyze uploaded CSV data using the API and execute the generated code"""
-    if not csv_file:
-        return "No file uploaded.", None, None, None
     try:
-        # Create safe executor
-        executor = SafeExecutor()
         # Read the CSV file
         df = pd.read_csv(csv_file.name)
-        columns = df.columns.tolist()
-        sample_data = df.head(3).to_dict()
         # Build the prompt
-        prompt = f"""Analyze this CSV file with columns: {columns}.
-Sample data: {sample_data}
-Generate Python code that:
-1. Creates insightful visualizations using matplotlib or seaborn
-2. Performs relevant statistical analysis
-3. Identifies key patterns or insights
-4. Properly handles potential data issues
-Important: Use only these libraries: {', '.join(SAFE_IMPORTS)}"""
         # Get code from API
-        generated_code = query_api(prompt, api_url, api_key, system_prompt)
-        # Create execution environment
-        globals_dict = {'df': df, 'pd': pd, 'np': np, 'plt': plt, 'sns': sns}
         # Execute the code
-        vis_path, execution_output = executor.execute_code(generated_code, globals_dict)
-        status = "Analysis completed successfully."
-        return status, generated_code, execution_output, vis_path
     except Exception as e:
-        return f"Error during analysis: {str(e)}", None, None, None
 def create_interface():
-    """Create the Gradio interface"""
     with gr.Blocks() as interface:
-        gr.Markdown("# AI-Powered Data Analysis Tool")
         with gr.Row():
-            with gr.Column():
-                api_url = gr.Textbox(
-                    label="API URL",
-                    placeholder="Enter API endpoint URL",
-                    type="text"
-                )
                 api_key = gr.Textbox(
-                    label="API Key",
-                    placeholder="Enter API key",
-                    type="password"
                 )
                 system_prompt = gr.Textbox(
                     label="System Prompt",
-                    placeholder="Enter system prompt for the AI",
-                    value="You are an AI assistant specialized in data analysis and visualization.",
                     lines=3
                 )
                 csv_file = gr.File(
                     label="Upload CSV File",
                     file_types=[".csv"]
                 )
-                analyze_button = gr.Button("Analyze Data")
-            with gr.Column():
-                status_output = gr.Textbox(label="Status")
-                code_output = gr.Code(
-                    label="Generated Code",
-                    language="python"
-                )
-                execution_output = gr.Textbox(
-                    label="Execution Output",
-                    lines=10
-                )
-                visualization_output = gr.Image(
-                    label="Visualization",
-                    type="filepath"
-                )
-        analyze_button.click(
-            fn=analyze_data,
-            inputs=[csv_file, api_url, api_key, system_prompt],
-            outputs=[status_output, code_output, execution_output, visualization_output]
-        )
         gr.Markdown("""
         ## How to Use
-        1. Enter your API URL and key (supports various API providers)
-        2. Customize the system prompt if desired
-        3. Upload a CSV file for analysis
-        4. Click 'Analyze Data' to generate and execute analysis code
         The tool will:
-        - Generate Python code to analyze your data
-        - Execute the code safely in a controlled environment
-        - Display both textual results and visualizations
-        - Support common data science libraries
         """)
     return interface

 import sys
 from io import StringIO
 import matplotlib.pyplot as plt
+import base64
+from pathlib import Path
+def install_package(package_name):
+    """Dynamically install any Python package"""
+    try:
+        subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
+        return True
+    except:
+        return False
+def safe_execute_code(code: str, globals_dict=None):
+    """Execute code safely and capture all outputs"""
+    if globals_dict is None:
+        globals_dict = {}
+    # Redirect stdout to capture print outputs
+    old_stdout = sys.stdout
+    redirected_output = StringIO()
+    sys.stdout = redirected_output
+    try:
+        # First pass: collect and install required imports
+        import_lines = [line for line in code.split('\n') if 'import' in line]
+        for line in import_lines:
+            parts = line.split()
+            if parts[0] == 'import':
+                package = parts[1].split('.')[0]
+                install_package(package)
+            elif parts[0] == 'from':
+                package = parts[1].split('.')[0]
+                install_package(package)
+        # Execute the code
+        exec(code, globals_dict)
+        output = redirected_output.getvalue()
+        # Handle any matplotlib figures
+        figures = []
+        if plt.get_figs():
+            for i, fig in enumerate(plt.get_figs()):
                 with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
+                    fig.savefig(tmp.name)
+                    figures.append(tmp.name)
+            plt.close('all')
+        return True, output, figures
+    except Exception as e:
+        return False, f"Error executing code:\n{str(e)}", []
+    finally:
+        sys.stdout = old_stdout
+def query_deepseek(prompt: str, api_key: str, system_prompt: str = None):
+    """Send a prompt to DeepSeek API"""
     headers = {
         "Content-Type": "application/json",
         "Authorization": f"Bearer {api_key}"
     }
+    messages = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    messages.append({"role": "user", "content": prompt})
     payload = {
+        "model": "deepseek-reasoner",
+        "messages": messages,
+        "stream": False
     }
     try:
+        response = requests.post("https://api.deepseek.com/chat/completions",
+                               headers=headers,
+                               json=payload)
         response.raise_for_status()
         return response.json()["choices"][0]["message"]["content"]
+    except Exception as e:
         return f"API Error: {str(e)}"
+def chat_function(message, history, csv_file, api_key, system_prompt):
+    """Handle chat interactions"""
+    if not api_key:
+        return "Please provide your DeepSeek API key first."
+    context = ""
+    if csv_file:
+        df = pd.read_csv(csv_file.name)
+        context = f"\nContext: I have loaded a CSV file with columns: {df.columns.tolist()}\n"
+        context += f"First few rows: {df.head(3).to_dict()}\n"
+    full_prompt = context + message
+    response = query_deepseek(full_prompt, api_key, system_prompt)
+    return response
+def analyze_data(csv_file, api_key, system_prompt, code_request):
+    """Generate and execute code for data analysis"""
+    if not csv_file:
+        return "Please upload a CSV file first.", None, None, []
+    if not api_key:
+        return "Please provide your DeepSeek API key.", None, None, []
     try:
         # Read the CSV file
         df = pd.read_csv(csv_file.name)
         # Build the prompt
+        prompt = f"""I have a CSV file with columns: {df.columns.tolist()}.
+First few rows: {df.head(3).to_dict()}.
+User request: {code_request}
+Please generate Python code that:
+1. Analyzes the data according to the request
+2. Creates relevant visualizations
+3. Handles potential errors and edge cases
+4. Includes helpful comments"""
         # Get code from API
+        generated_code = query_deepseek(prompt, api_key, system_prompt)
+        # Set up execution environment
+        globals_dict = {
+            'pd': pd,
+            'plt': plt,
+            'df': df,
+            'np': __import__('numpy')
+        }
         # Execute the code
+        success, execution_output, figures = safe_execute_code(generated_code, globals_dict)
+        if not success:
+            return f"Execution failed: {execution_output}", generated_code, None, []
+        return "Analysis completed successfully.", generated_code, execution_output, figures
     except Exception as e:
+        return f"Error during analysis: {str(e)}", None, None, []
 def create_interface():
+    """Create the dual-channel Gradio interface"""
     with gr.Blocks() as interface:
+        gr.Markdown("# AI Data Analysis Assistant")
         with gr.Row():
+            # Sidebar with common inputs
+            with gr.Column(scale=1):
                 api_key = gr.Textbox(
+                    label="DeepSeek API Key",
+                    type="password",
+                    placeholder="Enter your API key"
                 )
                 system_prompt = gr.Textbox(
                     label="System Prompt",
+                    value="You are an AI assistant specialized in data analysis and Python programming.",
                     lines=3
                 )
                 csv_file = gr.File(
                     label="Upload CSV File",
                     file_types=[".csv"]
                 )
+            # Main content area with tabs
+            with gr.Column(scale=3):
+                with gr.Tabs():
+                    # Chat Interface Tab
+                    with gr.TabItem("Chat"):
+                        chatbot = gr.Chatbot()
+                        msg = gr.Textbox(label="Your Message")
+                        clear = gr.Button("Clear Chat")
+                        msg.submit(
+                            chat_function,
+                            [msg, chatbot, csv_file, api_key, system_prompt],
+                            chatbot
+                        )
+                        clear.click(lambda: None, None, chatbot, queue=False)
+                    # Code Generation Tab
+                    with gr.TabItem("Code Generation"):
+                        code_request = gr.Textbox(
+                            label="What analysis would you like to perform?",
+                            placeholder="e.g., Create a correlation matrix and visualize key relationships",
+                            lines=3
+                        )
+                        analyze_button = gr.Button("Generate & Execute Code")
+                        with gr.Row():
+                            with gr.Column():
+                                status_output = gr.Textbox(label="Status")
+                                code_output = gr.Code(
+                                    label="Generated Code",
+                                    language="python"
+                                )
+                                execution_output = gr.Textbox(
+                                    label="Execution Output",
+                                    lines=10
+                                )
+                            with gr.Column():
+                                gallery = gr.Gallery(
+                                    label="Visualizations",
+                                    columns=2,
+                                    rows=2,
+                                    height="auto"
+                                )
+                        analyze_button.click(
+                            analyze_data,
+                            inputs=[csv_file, api_key, system_prompt, code_request],
+                            outputs=[status_output, code_output, execution_output, gallery]
+                        )
         gr.Markdown("""
         ## How to Use
+        1. Enter your DeepSeek API key
+        2. Upload a CSV file for analysis
+        3. Use either:
+           - Chat tab: Have a conversation about your data
+           - Code Generation tab: Get executable Python code for specific analyses
         The tool will:
+        - Generate and execute Python code
+        - Create visualizations
+        - Allow interactive exploration of your data
         """)
     return interface