jzou19950715 commited on
Commit
7cdbd20
·
verified ·
1 Parent(s): 40864e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -149
app.py CHANGED
@@ -8,207 +8,231 @@ import tempfile
8
  import sys
9
  from io import StringIO
10
  import matplotlib.pyplot as plt
11
- import seaborn as sns
12
- import numpy as np
13
- from typing import Dict, Any, Tuple, Optional
14
- import ast
15
-
16
- # Safe imports list - mirrors smolagents approach
17
- SAFE_IMPORTS = [
18
- "pandas", "numpy", "matplotlib", "seaborn", "sklearn",
19
- "scipy", "statsmodels", "plotly", "math", "datetime",
20
- "collections", "itertools", "functools", "operator"
21
- ]
22
-
23
- class SafeExecutor:
24
- """Safely executes Python code with restricted imports and environment"""
 
25
 
26
- def __init__(self, allowed_imports=None):
27
- self.allowed_imports = allowed_imports or SAFE_IMPORTS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- def validate_imports(self, code: str) -> bool:
30
- """Validate that all imports in the code are allowed"""
31
- try:
32
- tree = ast.parse(code)
33
- for node in ast.walk(tree):
34
- if isinstance(node, (ast.Import, ast.ImportFrom)):
35
- for name in node.names:
36
- module = name.name.split('.')[0]
37
- if module not in self.allowed_imports:
38
- raise ValueError(f"Import of '{module}' is not allowed. Allowed imports: {self.allowed_imports}")
39
- return True
40
- except Exception as e:
41
- raise ValueError(f"Code validation error: {str(e)}")
42
-
43
- def execute_code(self, code: str, globals_dict: Dict[str, Any] = None) -> Tuple[Any, str]:
44
- """Execute code safely and return the output"""
45
- if globals_dict is None:
46
- globals_dict = {}
47
-
48
- # Add safe imports to globals
49
- for module in self.allowed_imports:
50
- try:
51
- globals_dict[module] = __import__(module)
52
- except ImportError:
53
- pass
54
-
55
- # Redirect stdout to capture print outputs
56
- old_stdout = sys.stdout
57
- redirected_output = StringIO()
58
- sys.stdout = redirected_output
59
-
60
- try:
61
- # Validate imports first
62
- self.validate_imports(code)
63
-
64
- # Execute the code
65
- exec(code, globals_dict)
66
- output = redirected_output.getvalue()
67
-
68
- # Handle matplotlib figures
69
- if plt.get_figs():
70
  with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
71
- plt.savefig(tmp.name)
72
- plt.close('all')
73
- return tmp.name, output
74
-
75
- return None, output
76
-
77
- except Exception as e:
78
- return None, f"Error executing code:\n{str(e)}"
79
- finally:
80
- sys.stdout = old_stdout
81
 
82
- def query_api(prompt: str, api_url: str, api_key: str, system_prompt: str) -> str:
83
- """Send a prompt to the specified API and return the response"""
84
  headers = {
85
  "Content-Type": "application/json",
86
  "Authorization": f"Bearer {api_key}"
87
  }
88
 
 
 
 
 
 
89
  payload = {
90
- "messages": [
91
- {"role": "system", "content": system_prompt},
92
- {"role": "user", "content": prompt}
93
- ]
94
  }
95
 
96
  try:
97
- response = requests.post(api_url, headers=headers, json=payload)
 
 
98
  response.raise_for_status()
99
  return response.json()["choices"][0]["message"]["content"]
100
- except requests.exceptions.RequestException as e:
101
  return f"API Error: {str(e)}"
102
 
103
- def analyze_data(
104
- csv_file: str,
105
- api_url: str,
106
- api_key: str,
107
- system_prompt: str
108
- ) -> Tuple[str, str, str, Optional[str]]:
109
- """Analyze uploaded CSV data using the API and execute the generated code"""
110
 
111
- if not csv_file:
112
- return "No file uploaded.", None, None, None
 
 
 
 
 
 
 
113
 
 
 
 
 
 
 
 
 
114
  try:
115
- # Create safe executor
116
- executor = SafeExecutor()
117
-
118
  # Read the CSV file
119
  df = pd.read_csv(csv_file.name)
120
- columns = df.columns.tolist()
121
- sample_data = df.head(3).to_dict()
122
-
123
  # Build the prompt
124
- prompt = f"""Analyze this CSV file with columns: {columns}.
125
- Sample data: {sample_data}
126
 
127
- Generate Python code that:
128
- 1. Creates insightful visualizations using matplotlib or seaborn
129
- 2. Performs relevant statistical analysis
130
- 3. Identifies key patterns or insights
131
- 4. Properly handles potential data issues
132
 
133
- Important: Use only these libraries: {', '.join(SAFE_IMPORTS)}"""
 
 
 
 
134
 
135
  # Get code from API
136
- generated_code = query_api(prompt, api_url, api_key, system_prompt)
137
-
138
- # Create execution environment
139
- globals_dict = {'df': df, 'pd': pd, 'np': np, 'plt': plt, 'sns': sns}
140
 
 
 
 
 
 
 
 
 
141
  # Execute the code
142
- vis_path, execution_output = executor.execute_code(generated_code, globals_dict)
143
 
144
- status = "Analysis completed successfully."
145
- return status, generated_code, execution_output, vis_path
 
 
146
 
147
  except Exception as e:
148
- return f"Error during analysis: {str(e)}", None, None, None
149
 
150
  def create_interface():
151
- """Create the Gradio interface"""
152
  with gr.Blocks() as interface:
153
- gr.Markdown("# AI-Powered Data Analysis Tool")
154
 
155
  with gr.Row():
156
- with gr.Column():
157
- api_url = gr.Textbox(
158
- label="API URL",
159
- placeholder="Enter API endpoint URL",
160
- type="text"
161
- )
162
  api_key = gr.Textbox(
163
- label="API Key",
164
- placeholder="Enter API key",
165
- type="password"
166
  )
167
  system_prompt = gr.Textbox(
168
  label="System Prompt",
169
- placeholder="Enter system prompt for the AI",
170
- value="You are an AI assistant specialized in data analysis and visualization.",
171
  lines=3
172
  )
173
  csv_file = gr.File(
174
  label="Upload CSV File",
175
  file_types=[".csv"]
176
  )
177
- analyze_button = gr.Button("Analyze Data")
178
-
179
- with gr.Column():
180
- status_output = gr.Textbox(label="Status")
181
- code_output = gr.Code(
182
- label="Generated Code",
183
- language="python"
184
- )
185
- execution_output = gr.Textbox(
186
- label="Execution Output",
187
- lines=10
188
- )
189
- visualization_output = gr.Image(
190
- label="Visualization",
191
- type="filepath"
192
- )
193
-
194
- analyze_button.click(
195
- fn=analyze_data,
196
- inputs=[csv_file, api_url, api_key, system_prompt],
197
- outputs=[status_output, code_output, execution_output, visualization_output]
198
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  gr.Markdown("""
201
  ## How to Use
202
- 1. Enter your API URL and key (supports various API providers)
203
- 2. Customize the system prompt if desired
204
- 3. Upload a CSV file for analysis
205
- 4. Click 'Analyze Data' to generate and execute analysis code
 
206
 
207
  The tool will:
208
- - Generate Python code to analyze your data
209
- - Execute the code safely in a controlled environment
210
- - Display both textual results and visualizations
211
- - Support common data science libraries
212
  """)
213
 
214
  return interface
 
8
  import sys
9
  from io import StringIO
10
  import matplotlib.pyplot as plt
11
+ import base64
12
+ from pathlib import Path
13
+
14
+ def install_package(package_name):
15
+ """Dynamically install any Python package"""
16
+ try:
17
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
18
+ return True
19
+ except:
20
+ return False
21
+
22
+ def safe_execute_code(code: str, globals_dict=None):
23
+ """Execute code safely and capture all outputs"""
24
+ if globals_dict is None:
25
+ globals_dict = {}
26
 
27
+ # Redirect stdout to capture print outputs
28
+ old_stdout = sys.stdout
29
+ redirected_output = StringIO()
30
+ sys.stdout = redirected_output
31
+
32
+ try:
33
+ # First pass: collect and install required imports
34
+ import_lines = [line for line in code.split('\n') if 'import' in line]
35
+ for line in import_lines:
36
+ parts = line.split()
37
+ if parts[0] == 'import':
38
+ package = parts[1].split('.')[0]
39
+ install_package(package)
40
+ elif parts[0] == 'from':
41
+ package = parts[1].split('.')[0]
42
+ install_package(package)
43
+
44
+ # Execute the code
45
+ exec(code, globals_dict)
46
+ output = redirected_output.getvalue()
47
 
48
+ # Handle any matplotlib figures
49
+ figures = []
50
+ if plt.get_figs():
51
+ for i, fig in enumerate(plt.get_figs()):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
53
+ fig.savefig(tmp.name)
54
+ figures.append(tmp.name)
55
+ plt.close('all')
56
+
57
+ return True, output, figures
58
+ except Exception as e:
59
+ return False, f"Error executing code:\n{str(e)}", []
60
+ finally:
61
+ sys.stdout = old_stdout
 
62
 
63
+ def query_deepseek(prompt: str, api_key: str, system_prompt: str = None):
64
+ """Send a prompt to DeepSeek API"""
65
  headers = {
66
  "Content-Type": "application/json",
67
  "Authorization": f"Bearer {api_key}"
68
  }
69
 
70
+ messages = []
71
+ if system_prompt:
72
+ messages.append({"role": "system", "content": system_prompt})
73
+ messages.append({"role": "user", "content": prompt})
74
+
75
  payload = {
76
+ "model": "deepseek-reasoner",
77
+ "messages": messages,
78
+ "stream": False
 
79
  }
80
 
81
  try:
82
+ response = requests.post("https://api.deepseek.com/chat/completions",
83
+ headers=headers,
84
+ json=payload)
85
  response.raise_for_status()
86
  return response.json()["choices"][0]["message"]["content"]
87
+ except Exception as e:
88
  return f"API Error: {str(e)}"
89
 
90
+ def chat_function(message, history, csv_file, api_key, system_prompt):
91
+ """Handle chat interactions"""
92
+ if not api_key:
93
+ return "Please provide your DeepSeek API key first."
 
 
 
94
 
95
+ context = ""
96
+ if csv_file:
97
+ df = pd.read_csv(csv_file.name)
98
+ context = f"\nContext: I have loaded a CSV file with columns: {df.columns.tolist()}\n"
99
+ context += f"First few rows: {df.head(3).to_dict()}\n"
100
+
101
+ full_prompt = context + message
102
+ response = query_deepseek(full_prompt, api_key, system_prompt)
103
+ return response
104
 
105
+ def analyze_data(csv_file, api_key, system_prompt, code_request):
106
+ """Generate and execute code for data analysis"""
107
+ if not csv_file:
108
+ return "Please upload a CSV file first.", None, None, []
109
+
110
+ if not api_key:
111
+ return "Please provide your DeepSeek API key.", None, None, []
112
+
113
  try:
 
 
 
114
  # Read the CSV file
115
  df = pd.read_csv(csv_file.name)
116
+
 
 
117
  # Build the prompt
118
+ prompt = f"""I have a CSV file with columns: {df.columns.tolist()}.
119
+ First few rows: {df.head(3).to_dict()}.
120
 
121
+ User request: {code_request}
 
 
 
 
122
 
123
+ Please generate Python code that:
124
+ 1. Analyzes the data according to the request
125
+ 2. Creates relevant visualizations
126
+ 3. Handles potential errors and edge cases
127
+ 4. Includes helpful comments"""
128
 
129
  # Get code from API
130
+ generated_code = query_deepseek(prompt, api_key, system_prompt)
 
 
 
131
 
132
+ # Set up execution environment
133
+ globals_dict = {
134
+ 'pd': pd,
135
+ 'plt': plt,
136
+ 'df': df,
137
+ 'np': __import__('numpy')
138
+ }
139
+
140
  # Execute the code
141
+ success, execution_output, figures = safe_execute_code(generated_code, globals_dict)
142
 
143
+ if not success:
144
+ return f"Execution failed: {execution_output}", generated_code, None, []
145
+
146
+ return "Analysis completed successfully.", generated_code, execution_output, figures
147
 
148
  except Exception as e:
149
+ return f"Error during analysis: {str(e)}", None, None, []
150
 
151
  def create_interface():
152
+ """Create the dual-channel Gradio interface"""
153
  with gr.Blocks() as interface:
154
+ gr.Markdown("# AI Data Analysis Assistant")
155
 
156
  with gr.Row():
157
+ # Sidebar with common inputs
158
+ with gr.Column(scale=1):
 
 
 
 
159
  api_key = gr.Textbox(
160
+ label="DeepSeek API Key",
161
+ type="password",
162
+ placeholder="Enter your API key"
163
  )
164
  system_prompt = gr.Textbox(
165
  label="System Prompt",
166
+ value="You are an AI assistant specialized in data analysis and Python programming.",
 
167
  lines=3
168
  )
169
  csv_file = gr.File(
170
  label="Upload CSV File",
171
  file_types=[".csv"]
172
  )
173
+
174
+ # Main content area with tabs
175
+ with gr.Column(scale=3):
176
+ with gr.Tabs():
177
+ # Chat Interface Tab
178
+ with gr.TabItem("Chat"):
179
+ chatbot = gr.Chatbot()
180
+ msg = gr.Textbox(label="Your Message")
181
+ clear = gr.Button("Clear Chat")
182
+
183
+ msg.submit(
184
+ chat_function,
185
+ [msg, chatbot, csv_file, api_key, system_prompt],
186
+ chatbot
187
+ )
188
+ clear.click(lambda: None, None, chatbot, queue=False)
189
+
190
+ # Code Generation Tab
191
+ with gr.TabItem("Code Generation"):
192
+ code_request = gr.Textbox(
193
+ label="What analysis would you like to perform?",
194
+ placeholder="e.g., Create a correlation matrix and visualize key relationships",
195
+ lines=3
196
+ )
197
+ analyze_button = gr.Button("Generate & Execute Code")
198
+
199
+ with gr.Row():
200
+ with gr.Column():
201
+ status_output = gr.Textbox(label="Status")
202
+ code_output = gr.Code(
203
+ label="Generated Code",
204
+ language="python"
205
+ )
206
+ execution_output = gr.Textbox(
207
+ label="Execution Output",
208
+ lines=10
209
+ )
210
+ with gr.Column():
211
+ gallery = gr.Gallery(
212
+ label="Visualizations",
213
+ columns=2,
214
+ rows=2,
215
+ height="auto"
216
+ )
217
+
218
+ analyze_button.click(
219
+ analyze_data,
220
+ inputs=[csv_file, api_key, system_prompt, code_request],
221
+ outputs=[status_output, code_output, execution_output, gallery]
222
+ )
223
 
224
  gr.Markdown("""
225
  ## How to Use
226
+ 1. Enter your DeepSeek API key
227
+ 2. Upload a CSV file for analysis
228
+ 3. Use either:
229
+ - Chat tab: Have a conversation about your data
230
+ - Code Generation tab: Get executable Python code for specific analyses
231
 
232
  The tool will:
233
+ - Generate and execute Python code
234
+ - Create visualizations
235
+ - Allow interactive exploration of your data
 
236
  """)
237
 
238
  return interface