jzou19950715's picture
Update app.py
21df540 verified
raw
history blame
5.28 kB
import os
from typing import Optional
import gradio as gr
import pandas as pd
from minimal_agent import MinimalAgent
def analyze_dataframe(df: pd.DataFrame, analysis_type: str) -> str:
"""Basic DataFrame analysis"""
if analysis_type == "summary":
return str(df.describe())
elif analysis_type == "info":
buffer = []
df.info(buf=buffer)
return "\n".join(buffer)
return "Unknown analysis type"
def plot_data(df: pd.DataFrame, plot_type: str) -> None:
"""Basic plotting function"""
import matplotlib.pyplot as plt
import seaborn as sns
if plot_type == "correlation":
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True)
plt.title("Correlation Heatmap")
elif plot_type == "distribution":
df.hist(figsize=(15, 10))
plt.tight_layout()
def process_file(file: gr.File) -> Optional[pd.DataFrame]:
"""Process uploaded file into DataFrame"""
if not file:
return None
try:
if file.name.endswith('.csv'):
return pd.read_csv(file.name)
elif file.name.endswith(('.xlsx', '.xls')):
return pd.read_excel(file.name)
except Exception as e:
print(f"Error reading file: {str(e)}")
return None
def analyze_data(
file: gr.File,
query: str,
api_key: str,
temperature: float = 0.7,
) -> str:
"""Process user request and generate analysis"""
if not api_key:
return "Error: Please provide an API key."
if not file:
return "Error: Please upload a file."
try:
# Set up environment
os.environ["OPENAI_API_KEY"] = api_key
# Create agent
agent = MinimalAgent(
model_id="gpt-4o-mini",
temperature=temperature
)
# Add tools
agent.add_tool(
"analyze_dataframe",
"Analyze DataFrame with various metrics",
analyze_dataframe
)
agent.add_tool(
"plot_data",
"Create various plots from DataFrame",
plot_data
)
# Process file
df = process_file(file)
if df is None:
return "Error: Could not process file."
# Build context
file_info = f"""
File: {file.name}
Shape: {df.shape}
Columns: {', '.join(df.columns)}
Column Types:
{chr(10).join([f'- {col}: {dtype}' for col, dtype in df.dtypes.items()])}
"""
# Run analysis
prompt = f"""
{file_info}
The data is loaded in a pandas DataFrame called 'df'.
User request: {query}
Please analyze the data and provide:
1. A clear explanation of your approach
2. Code for the analysis
3. Visualizations where relevant
4. Key insights and findings
"""
return agent.run(prompt)
except Exception as e:
return f"Error occurred: {str(e)}"
def create_interface():
"""Create Gradio interface"""
with gr.Blocks(title="AI Data Analysis Assistant") as interface:
gr.Markdown("""
# AI Data Analysis Assistant
Upload your data file and ask questions in natural language.
**Features:**
- Data analysis and visualization
- Statistical analysis
- Machine learning capabilities
**Note**: Requires your own GPT-4 API key.
""")
with gr.Row():
with gr.Column():
file = gr.File(
label="Upload Data File",
file_types=[".csv", ".xlsx", ".xls"]
)
query = gr.Textbox(
label="What would you like to analyze?",
placeholder="e.g., Create visualizations showing relationships between variables",
lines=3
)
api_key = gr.Textbox(
label="API Key (Required)",
placeholder="Your API key",
type="password"
)
temperature = gr.Slider(
label="Temperature",
minimum=0.0,
maximum=1.0,
value=0.7,
step=0.1
)
analyze_btn = gr.Button("Analyze")
with gr.Column():
output = gr.Markdown(label="Output")
analyze_btn.click(
analyze_data,
inputs=[file, query, api_key, temperature],
outputs=output
)
gr.Examples(
examples=[
[None, "Show key statistics and create visualizations for numeric columns"],
[None, "Find correlations and patterns in the data"],
[None, "Identify outliers and unusual patterns"],
[None, "Create summary visualizations of the main variables"],
],
inputs=[file, query]
)
return interface
if __name__ == "__main__":
interface = create_interface()
interface.launch()