Spaces:

jzou19950715
/

Lossdog_Data_Science_Expert

Build error

App Files Files Community

jzou19950715 commited on Jan 21

Commit

a5d47f2

verified ·

1 Parent(s): c94ceb7

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -196

app.py CHANGED Viewed

@@ -8,254 +8,168 @@ import tempfile
 import sys
 from io import StringIO
 import matplotlib.pyplot as plt
-import re
 from pathlib import Path
 import importlib
-class CodeExecutionEnvironment:
     def __init__(self):
         self.globals_dict = {}
-        self.figures_dir = "temp_figures"
-        os.makedirs(self.figures_dir, exist_ok=True)
-    def install_package(self, package_name):
-        """Dynamically install a Python package"""
         try:
-            subprocess.check_call([sys.executable, "-m", "pip", "install", package_name],
-                                stdout=subprocess.DEVNULL,
-                                stderr=subprocess.DEVNULL)
             return True
-        except:
             return False
-    def execute_code(self, code):
-        """Execute code in the managed environment"""
-        # Redirect stdout to capture prints
-        old_stdout = sys.stdout
-        redirected_output = StringIO()
-        sys.stdout = redirected_output
         try:
-            # First pass: collect and install required imports
-            import_lines = [line for line in code.split('\n') if 'import' in line]
-            for line in import_lines:
-                parts = line.split()
-                if parts[0] == 'import':
-                    package = parts[1].split('.')[0]
-                    if package not in sys.modules:
-                        self.install_package(package)
-                        try:
-                            self.globals_dict[package] = importlib.import_module(package)
-                        except:
-                            pass
-                elif parts[0] == 'from':
-                    package = parts[1].split('.')[0]
-                    if package not in sys.modules:
-                        self.install_package(package)
-            # Add common data science packages to globals
-            if 'pd' not in self.globals_dict:
-                self.globals_dict['pd'] = pd
-            if 'plt' not in self.globals_dict:
-                self.globals_dict['plt'] = plt
-            if 'np' not in self.globals_dict:
-                import numpy as np
-                self.globals_dict['np'] = np
             # Execute the code
             exec(code, self.globals_dict)
-            output = redirected_output.getvalue()
-            # Handle different types of figures
-            figures = []
-            # Handle Matplotlib figures
-            if plt.get_figs():
                 for i, fig in enumerate(plt.get_figs()):
-                    fig_path = os.path.join(self.figures_dir, f"mpl_figure_{len(figures)}.png")
                     fig.savefig(fig_path)
                     figures.append(fig_path)
                 plt.close('all')
-            # Handle Plotly figures
-            if 'fig' in self.globals_dict and 'plotly' in str(type(self.globals_dict['fig'])):
                 fig = self.globals_dict['fig']
-                fig_path = os.path.join(self.figures_dir, f"plotly_figure_{len(figures)}.html")
-                fig.write_html(fig_path)
-                # Also save as image for gallery display
-                img_path = os.path.join(self.figures_dir, f"plotly_figure_{len(figures)}.png")
-                fig.write_image(img_path)
-                figures.append(img_path)
-            return True, output, figures
         except Exception as e:
             return False, str(e), []
         finally:
-            sys.stdout = old_stdout
-    def extract_and_execute_code(self, text):
-        """Extract code blocks from markdown and execute them"""
-        code_blocks = re.findall(r'```python(.*?)```', text, re.DOTALL)
-        if not code_blocks:
-            return text, None, []
-        all_outputs = []
-        all_figures = []
-        for code in code_blocks:
-            success, output, figures = self.execute_code(code.strip())
-            if success:
-                all_outputs.append(output)
-                all_figures.extend(figures)
-            else:
-                all_outputs.append(f"Error: {output}")
-        # Replace code blocks with code + output
-        modified_text = text
-        for i, (code, output) in enumerate(zip(code_blocks, all_outputs)):
-            code_section = f"```python{code}```"
-            output_section = f"\nOutput:\n```\n{output}\n```"
-            modified_text = modified_text.replace(code_section, code_section + output_section)
-        return modified_text, "\n".join(all_outputs), all_figures
-def query_deepseek(prompt: str, api_key: str, system_prompt: str = None):
-    """Send a prompt to DeepSeek API"""
-    headers = {
-        "Content-Type": "application/json",
-        "Authorization": f"Bearer {api_key}"
-    }
-    messages = []
-    if system_prompt:
-        messages.append({"role": "system", "content": system_prompt})
-    messages.append({"role": "user", "content": prompt})
-    payload = {
-        "model": "deepseek-reasoner",
-        "messages": messages,
-        "stream": False
-    }
-    try:
-        response = requests.post("https://api.deepseek.com/chat/completions",
-                               headers=headers,
-                               json=payload)
-        response.raise_for_status()
-        return response.json()["choices"][0]["message"]["content"]
-    except Exception as e:
-        return f"API Error: {str(e)}"
-class ChatAndCodeInterface:
-    def __init__(self):
-        self.env = CodeExecutionEnvironment()
-        self.current_df = None
-    def process_message(self, message, history, csv_file, api_key, system_prompt):
-        """Process a chat message with code execution capabilities"""
         if not api_key:
-            return history + [[message, "Please provide your DeepSeek API key first."]], None
-        # Update dataframe if new CSV uploaded
-        if csv_file and (self.current_df is None or csv_file.name != getattr(self.current_df, '_filename', None)):
-            self.current_df = pd.read_csv(csv_file.name)
-            self.current_df._filename = csv_file.name
-            self.env.globals_dict['df'] = self.current_df
-        # Build context
-        context = ""
-        if self.current_df is not None:
-            context = (f"\nContext: Working with CSV file containing columns: {self.current_df.columns.tolist()}\n"
-                      f"First few rows: {self.current_df.head(3).to_dict()}\n"
-                      f"The dataframe is available as 'df' in the code environment.\n")
-        # Get AI response
-        full_prompt = (
-            context +
-            "The user might ask you to analyze data or generate visualizations. "
-            "When you write code, wrap it in ```python``` blocks. "
-            "You can use any Python library - they will be automatically installed. "
-            "For interactive maps, use plotly.express and ensure you install required dependencies. "
-            "\nUser message: " + message
-        )
-        response = query_deepseek(full_prompt, api_key, system_prompt)
-        # Execute any code in the response
-        modified_response, outputs, figures = self.env.extract_and_execute_code(response)
-        # Update chat history
-        history = history + [[message, modified_response]]
-        return history, figures
-def create_interface():
-    """Create the unified chat and code execution interface"""
-    interface = ChatAndCodeInterface()
     with gr.Blocks() as demo:
-        gr.Markdown("# AI Data Analysis Assistant with Code Execution")
         with gr.Row():
             with gr.Column(scale=1):
                 api_key = gr.Textbox(
-                    label="DeepSeek API Key",
-                    type="password",
-                    placeholder="Enter your API key"
-                )
-                system_prompt = gr.Textbox(
-                    label="System Prompt",
-                    value="You are an AI assistant specialized in data analysis and Python programming. When asked to analyze data or create visualizations, you provide executable Python code.",
-                    lines=3
                 )
                 csv_file = gr.File(
-                    label="Upload CSV File",
                     file_types=[".csv"]
                 )
             with gr.Column(scale=3):
                 chatbot = gr.Chatbot(height=500)
-                gallery = gr.Gallery(
-                    label="Generated Visualizations",
-                    columns=2,
-                    height=400,
-                    object_fit="contain"
-                )
                 with gr.Row():
                     msg = gr.Textbox(
-                        label="Your Message",
-                        placeholder="Ask me to analyze your data or create visualizations...",
-                        scale=9
                     )
-                    clear = gr.Button("Clear", scale=1)
         msg.submit(
-            interface.process_message,
-            [msg, chatbot, csv_file, api_key, system_prompt],
             [chatbot, gallery]
         )
-        clear.click(lambda: ([], []), None, [chatbot, gallery], queue=False)
-        gr.Markdown("""
-        ## How to Use
-        1. Enter your DeepSeek API key
-        2. Upload a CSV file for analysis
-        3. Chat naturally about your data analysis needs
-        Example prompts:
-        - "Create a histogram of the numerical columns"
-        - "Generate an interactive map of the locations"
-        - "Show the correlation between variables"
-        - "Create a summary dashboard of key metrics"
-        The assistant will:
-        - Generate and execute Python code automatically
-        - Handle both static and interactive visualizations
-        - Show code, output, and visualizations in one place
-        """)
     return demo

 import sys
 from io import StringIO
 import matplotlib.pyplot as plt
 from pathlib import Path
 import importlib
+import ast
+class AICodeEnvironment:
+    """Environment for AI to execute code safely"""
     def __init__(self):
         self.globals_dict = {}
+        self.temp_dir = "temp_outputs"
+        os.makedirs(self.temp_dir, exist_ok=True)
+        self.setup_base_environment()
+    def setup_base_environment(self):
+        """Set up the base environment with commonly used packages"""
+        self.globals_dict.update({
+            'pd': pd,
+            'plt': plt,
+            '__builtins__': __builtins__,
+            'print': print
+        })
+    def dynamic_import(self, package_name):
+        """Dynamically import packages as needed by AI"""
         try:
+            # Install package if not present
+            subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet", package_name])
+            # Import the package
+            module = importlib.import_module(package_name)
+            self.globals_dict[package_name] = module
             return True
+        except Exception as e:
+            print(f"Failed to import {package_name}: {str(e)}")
             return False
+    def handle_imports(self, code):
+        """Extract and handle all imports in the code"""
         try:
+            tree = ast.parse(code)
+            for node in ast.walk(tree):
+                if isinstance(node, (ast.Import, ast.ImportFrom)):
+                    for name in node.names:
+                        package = name.name.split('.')[0]
+                        if package not in self.globals_dict:
+                            self.dynamic_import(package)
+            return True
+        except Exception as e:
+            return False
+    def execute_code(self, code):
+        """Execute code and capture all outputs"""
+        # Create temporary stdout to capture prints
+        output_buffer = StringIO()
+        sys.stdout = output_buffer
+        try:
+            # Handle imports first
+            self.handle_imports(code)
             # Execute the code
             exec(code, self.globals_dict)
+            # Capture terminal output
+            text_output = output_buffer.getvalue()
+            # Handle figures
+            figures = []
+            if 'plt' in self.globals_dict and plt.get_figs():
                 for i, fig in enumerate(plt.get_figs()):
+                    fig_path = os.path.join(self.temp_dir, f"figure_{len(figures)}.png")
                     fig.savefig(fig_path)
                     figures.append(fig_path)
                 plt.close('all')
+            # Check for other visualization libraries
+            if 'fig' in self.globals_dict:
                 fig = self.globals_dict['fig']
+                # Handle Plotly figures
+                if 'plotly.graph_objs' in str(type(fig)):
+                    fig_path = os.path.join(self.temp_dir, f"figure_{len(figures)}.html")
+                    fig.write_html(fig_path)
+                    # Also save static image
+                    img_path = os.path.join(self.temp_dir, f"figure_{len(figures)}.png")
+                    fig.write_image(img_path)
+                    figures.append(img_path)
+            return True, text_output, figures
         except Exception as e:
             return False, str(e), []
         finally:
+            sys.stdout = sys.__stdout__
+def create_interface():
+    """Create the interface for AI code execution"""
+    env = AICodeEnvironment()
+    def process_message(message, history, csv_file, api_key):
+        """Process message and execute any code blocks"""
         if not api_key:
+            return history + [[message, "Please provide your API key."]], None
+        # Update environment with dataframe if CSV uploaded
+        if csv_file:
+            env.globals_dict['df'] = pd.read_csv(csv_file.name)
+        # Get response from AI (example structure)
+        response = query_ai(message, api_key)
+        # Extract and execute code blocks
+        code_blocks = response.split("```python")
+        outputs = []
+        figures = []
+        for block in code_blocks[1:]:  # Skip first split as it's before any code block
+            code = block.split("```")[0].strip()
+            success, output, new_figures = env.execute_code(code)
+            outputs.append(output)
+            figures.extend(new_figures)
+        # Format response with outputs
+        modified_response = response
+        for i, output in enumerate(outputs):
+            modified_response = modified_response.replace(
+                f"```python{code_blocks[i+1].split('```')[0]}```",
+                f"```python{code_blocks[i+1].split('```')[0]}```\nOutput:\n{output}"
+            )
+        return history + [[message, modified_response]], figures
+    # Create Gradio interface
     with gr.Blocks() as demo:
+        gr.Markdown("# AI Code Execution Environment")
         with gr.Row():
             with gr.Column(scale=1):
                 api_key = gr.Textbox(
+                    label="API Key",
+                    type="password"
                 )
                 csv_file = gr.File(
+                    label="Upload CSV",
                     file_types=[".csv"]
                 )
             with gr.Column(scale=3):
                 chatbot = gr.Chatbot(height=500)
+                gallery = gr.Gallery(label="Outputs")
                 with gr.Row():
                     msg = gr.Textbox(
+                        label="Message",
+                        placeholder="Ask me to analyze your data..."
                     )
+                    clear = gr.Button("Clear")
         msg.submit(
+            process_message,
+            [msg, chatbot, csv_file, api_key],
             [chatbot, gallery]
         )
+        clear.click(lambda: ([], []), None, [chatbot, gallery])
     return demo