Spaces:

jzou19950715
/

Lossdog_Data_Science_Expert

Running

App Files Files Community

jzou19950715 commited on Jan 21

Commit

069bc6a

verified ·

1 Parent(s): dc45114

Update app.py

Browse files

Files changed (1) hide show

app.py +172 -168

app.py CHANGED Viewed

@@ -8,57 +8,106 @@ import tempfile
 import sys
 from io import StringIO
 import matplotlib.pyplot as plt
-import base64
 from pathlib import Path
-def install_package(package_name):
-    """Dynamically install any Python package"""
-    try:
-        subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
-        return True
-    except:
-        return False
-def safe_execute_code(code: str, globals_dict=None):
-    """Execute code safely and capture all outputs"""
-    if globals_dict is None:
-        globals_dict = {}
-    # Redirect stdout to capture print outputs
-    old_stdout = sys.stdout
-    redirected_output = StringIO()
-    sys.stdout = redirected_output
-    try:
-        # First pass: collect and install required imports
-        import_lines = [line for line in code.split('\n') if 'import' in line]
-        for line in import_lines:
-            parts = line.split()
-            if parts[0] == 'import':
-                package = parts[1].split('.')[0]
-                install_package(package)
-            elif parts[0] == 'from':
-                package = parts[1].split('.')[0]
-                install_package(package)
-        # Execute the code
-        exec(code, globals_dict)
-        output = redirected_output.getvalue()
-        # Handle any matplotlib figures
-        figures = []
-        if plt.get_figs():
-            for i, fig in enumerate(plt.get_figs()):
-                with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
-                    fig.savefig(tmp.name)
-                    figures.append(tmp.name)
-            plt.close('all')
-        return True, output, figures
-    except Exception as e:
-        return False, f"Error executing code:\n{str(e)}", []
-    finally:
-        sys.stdout = old_stdout
 def query_deepseek(prompt: str, api_key: str, system_prompt: str = None):
     """Send a prompt to DeepSeek API"""
@@ -87,74 +136,56 @@ def query_deepseek(prompt: str, api_key: str, system_prompt: str = None):
     except Exception as e:
         return f"API Error: {str(e)}"
-def chat_function(message, history, csv_file, api_key, system_prompt):
-    """Handle chat interactions"""
-    if not api_key:
-        return "Please provide your DeepSeek API key first."
-    context = ""
-    if csv_file:
-        df = pd.read_csv(csv_file.name)
-        context = f"\nContext: I have loaded a CSV file with columns: {df.columns.tolist()}\n"
-        context += f"First few rows: {df.head(3).to_dict()}\n"
-    full_prompt = context + message
-    response = query_deepseek(full_prompt, api_key, system_prompt)
-    return response
-def analyze_data(csv_file, api_key, system_prompt, code_request):
-    """Generate and execute code for data analysis"""
-    if not csv_file:
-        return "Please upload a CSV file first.", None, None, []
-    if not api_key:
-        return "Please provide your DeepSeek API key.", None, None, []
-    try:
-        # Read the CSV file
-        df = pd.read_csv(csv_file.name)
-        # Build the prompt
-        prompt = f"""I have a CSV file with columns: {df.columns.tolist()}.
-First few rows: {df.head(3).to_dict()}.
-User request: {code_request}
-Please generate Python code that:
-1. Analyzes the data according to the request
-2. Creates relevant visualizations
-3. Handles potential errors and edge cases
-4. Includes helpful comments"""
-        # Get code from API
-        generated_code = query_deepseek(prompt, api_key, system_prompt)
-        # Set up execution environment
-        globals_dict = {
-            'pd': pd,
-            'plt': plt,
-            'df': df,
-            'np': __import__('numpy')
-        }
-        # Execute the code
-        success, execution_output, figures = safe_execute_code(generated_code, globals_dict)
-        if not success:
-            return f"Execution failed: {execution_output}", generated_code, None, []
-        return "Analysis completed successfully.", generated_code, execution_output, figures
-    except Exception as e:
-        return f"Error during analysis: {str(e)}", None, None, []
 def create_interface():
-    """Create the dual-channel Gradio interface"""
-    with gr.Blocks() as interface:
-        gr.Markdown("# AI Data Analysis Assistant")
         with gr.Row():
-            # Sidebar with common inputs
             with gr.Column(scale=1):
                 api_key = gr.Textbox(
                     label="DeepSeek API Key",
@@ -163,7 +194,7 @@ def create_interface():
                 )
                 system_prompt = gr.Textbox(
                     label="System Prompt",
-                    value="You are an AI assistant specialized in data analysis and Python programming.",
                     lines=3
                 )
                 csv_file = gr.File(
@@ -171,72 +202,45 @@ def create_interface():
                     file_types=[".csv"]
                 )
-            # Main content area with tabs
             with gr.Column(scale=3):
-                with gr.Tabs():
-                    # Chat Interface Tab
-                    with gr.TabItem("Chat"):
-                        chatbot = gr.Chatbot()
-                        msg = gr.Textbox(label="Your Message")
-                        clear = gr.Button("Clear Chat")
-                        msg.submit(
-                            chat_function,
-                            [msg, chatbot, csv_file, api_key, system_prompt],
-                            chatbot
-                        )
-                        clear.click(lambda: None, None, chatbot, queue=False)
-                    # Code Generation Tab
-                    with gr.TabItem("Code Generation"):
-                        code_request = gr.Textbox(
-                            label="What analysis would you like to perform?",
-                            placeholder="e.g., Create a correlation matrix and visualize key relationships",
-                            lines=3
-                        )
-                        analyze_button = gr.Button("Generate & Execute Code")
-                        with gr.Row():
-                            with gr.Column():
-                                status_output = gr.Textbox(label="Status")
-                                code_output = gr.Code(
-                                    label="Generated Code",
-                                    language="python"
-                                )
-                                execution_output = gr.Textbox(
-                                    label="Execution Output",
-                                    lines=10
-                                )
-                            with gr.Column():
-                                gallery = gr.Gallery(
-                                    label="Visualizations",
-                                    columns=2,
-                                    rows=2,
-                                    height="auto"
-                                )
-                        analyze_button.click(
-                            analyze_data,
-                            inputs=[csv_file, api_key, system_prompt, code_request],
-                            outputs=[status_output, code_output, execution_output, gallery]
-                        )
         gr.Markdown("""
         ## How to Use
         1. Enter your DeepSeek API key
         2. Upload a CSV file for analysis
-        3. Use either:
-           - Chat tab: Have a conversation about your data
-           - Code Generation tab: Get executable Python code for specific analyses
-        The tool will:
-        - Generate and execute Python code
-        - Create visualizations
-        - Allow interactive exploration of your data
         """)
-    return interface
 if __name__ == "__main__":
-    interface = create_interface()
-    interface.launch()

 import sys
 from io import StringIO
 import matplotlib.pyplot as plt
+import re
 from pathlib import Path
+import importlib
+class CodeExecutionEnvironment:
+    def __init__(self):
+        self.globals_dict = {}
+        self.figures_dir = "temp_figures"
+        os.makedirs(self.figures_dir, exist_ok=True)
+    def install_package(self, package_name):
+        """Dynamically install a Python package"""
+        try:
+            subprocess.check_call([sys.executable, "-m", "pip", "install", package_name],
+                                stdout=subprocess.DEVNULL,
+                                stderr=subprocess.DEVNULL)
+            return True
+        except:
+            return False
+    def extract_and_execute_code(self, text):
+        """Extract code blocks from markdown and execute them"""
+        # Pattern for code blocks
+        code_blocks = re.findall(r'```python(.*?)```', text, re.DOTALL)
+        if not code_blocks:
+            return text, None, []
+        all_outputs = []
+        all_figures = []
+        for code in code_blocks:
+            success, output, figures = self.execute_code(code.strip())
+            if success:
+                all_outputs.append(output)
+                all_figures.extend(figures)
+            else:
+                all_outputs.append(f"Error: {output}")
+        # Replace code blocks with code + output
+        modified_text = text
+        for i, (code, output) in enumerate(zip(code_blocks, all_outputs)):
+            code_section = f"```python{code}```"
+            output_section = f"\nOutput:\n```\n{output}\n```"
+            modified_text = modified_text.replace(code_section, code_section + output_section)
+        return modified_text, "\n".join(all_outputs), all_figures
+    def execute_code(self, code):
+        """Execute code in the managed environment"""
+        # Redirect stdout to capture prints
+        old_stdout = sys.stdout
+        redirected_output = StringIO()
+        sys.stdout = redirected_output
+        try:
+            # First pass: collect and install required imports
+            import_lines = [line for line in code.split('\n') if 'import' in line]
+            for line in import_lines:
+                parts = line.split()
+                if parts[0] == 'import':
+                    package = parts[1].split('.')[0]
+                    if package not in sys.modules:
+                        self.install_package(package)
+                        try:
+                            self.globals_dict[package] = importlib.import_module(package)
+                        except:
+                            pass
+                elif parts[0] == 'from':
+                    package = parts[1].split('.')[0]
+                    if package not in sys.modules:
+                        self.install_package(package)
+            # Add common data science packages to globals
+            if 'pd' not in self.globals_dict:
+                self.globals_dict['pd'] = pd
+            if 'plt' not in self.globals_dict:
+                self.globals_dict['plt'] = plt
+            if 'np' not in self.globals_dict:
+                import numpy as np
+                self.globals_dict['np'] = np
+            # Execute the code
+            exec(code, self.globals_dict)
+            output = redirected_output.getvalue()
+            # Capture figures
+            figures = []
+            if plt.get_figs():
+                for i, fig in enumerate(plt.get_figs()):
+                    fig_path = os.path.join(self.figures_dir, f"figure_{len(figures)}.png")
+                    fig.savefig(fig_path)
+                    figures.append(fig_path)
+                plt.close('all')
+            return True, output, figures
+        except Exception as e:
+            return False, str(e), []
+        finally:
+            sys.stdout = old_stdout
 def query_deepseek(prompt: str, api_key: str, system_prompt: str = None):
     """Send a prompt to DeepSeek API"""
     except Exception as e:
         return f"API Error: {str(e)}"
+class ChatAndCodeInterface:
+    def __init__(self):
+        self.env = CodeExecutionEnvironment()
+        self.current_df = None
+    def process_message(self, message, history, csv_file, api_key, system_prompt):
+        """Process a chat message with code execution capabilities"""
+        if not api_key:
+            return history + [[message, "Please provide your DeepSeek API key first."]], None
+        # Update dataframe if new CSV uploaded
+        if csv_file and (self.current_df is None or csv_file.name != getattr(self.current_df, '_filename', None)):
+            self.current_df = pd.read_csv(csv_file.name)
+            self.current_df._filename = csv_file.name
+            self.env.globals_dict['df'] = self.current_df
+        # Build context
+        context = ""
+        if self.current_df is not None:
+            context = (f"\nContext: Working with CSV file containing columns: {self.current_df.columns.tolist()}\n"
+                      f"First few rows: {self.current_df.head(3).to_dict()}\n"
+                      f"The dataframe is available as 'df' in the code environment.\n")
+        # Get AI response
+        full_prompt = (
+            context +
+            "The user might ask you to analyze data or generate visualizations. "
+            "When you write code, wrap it in ```python``` blocks. "
+            "You can use any Python library - they will be automatically installed. "
+            "\nUser message: " + message
+        )
+        response = query_deepseek(full_prompt, api_key, system_prompt)
+        # Execute any code in the response
+        modified_response, outputs, figures = self.env.extract_and_execute_code(response)
+        # Update chat history
+        history = history + [[message, modified_response]]
+        return history, figures
 def create_interface():
+    """Create the unified chat and code execution interface"""
+    interface = ChatAndCodeInterface()
+    with gr.Blocks() as demo:
+        gr.Markdown("# AI Data Analysis Assistant with Code Execution")
         with gr.Row():
             with gr.Column(scale=1):
                 api_key = gr.Textbox(
                     label="DeepSeek API Key",
                 )
                 system_prompt = gr.Textbox(
                     label="System Prompt",
+                    value="You are an AI assistant specialized in data analysis and Python programming. When asked to analyze data or create visualizations, you provide executable Python code.",
                     lines=3
                 )
                 csv_file = gr.File(
                     file_types=[".csv"]
                 )
             with gr.Column(scale=3):
+                chatbot = gr.Chatbot(height=400)
+                gallery = gr.Gallery(label="Generated Visualizations", columns=2, height=300)
+                with gr.Row():
+                    msg = gr.Textbox(
+                        label="Your Message",
+                        placeholder="Ask me to analyze your data or create visualizations...",
+                        scale=9
+                    )
+                    clear = gr.Button("Clear", scale=1)
+        msg.submit(
+            interface.process_message,
+            [msg, chatbot, csv_file, api_key, system_prompt],
+            [chatbot, gallery]
+        )
+        clear.click(lambda: ([], []), None, [chatbot, gallery], queue=False)
         gr.Markdown("""
         ## How to Use
         1. Enter your DeepSeek API key
         2. Upload a CSV file for analysis
+        3. Chat naturally about your data analysis needs
+        Example prompts:
+        - "Create a histogram of the numerical columns"
+        - "Analyze the correlation between variables"
+        - "Generate summary statistics and visualize key trends"
+        The assistant will:
+        - Generate and execute Python code automatically
+        - Show both code and its output in the chat
+        - Display generated visualizations in the gallery
         """)
+    return demo
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()