Spaces:

jzou19950715
/

Huggingface_AI_Agent_Function_Testing

Sleeping

App Files Files Community

jzou19950715 commited on Jan 24

Commit

be8a1ca

verified ·

1 Parent(s): 05c2a98

Update app.py

Browse files

Files changed (1) hide show

app.py +169 -123

app.py CHANGED Viewed

@@ -3,139 +3,195 @@ from typing import List, Optional, Union
 import gradio as gr
 import pandas as pd
-from dotenv import load_dotenv
-from pandas import DataFrame
 from smolagents import CodeAgent, LiteLLMModel, tool
-# Load environment variables
-load_dotenv()
-def create_agent():
-    """Create a CodeAgent instance with GPT-4 backend."""
-    model = LiteLLMModel(model_id="gpt-4o-mini")
-    @tool
-    def read_csv(filepath: str) -> DataFrame:
-        """
-        Read a CSV file and return a pandas DataFrame.
-        Args:
-            filepath: Path to the CSV file
-        """
-        return pd.read_csv(filepath)
-    @tool
-    def read_excel(filepath: str) -> DataFrame:
-        """
-        Read an Excel file and return a pandas DataFrame.
-        Args:
-            filepath: Path to the Excel file
-        """
-        return pd.read_excel(filepath)
-    agent = CodeAgent(
-        tools=[read_csv, read_excel],
-        model=model,
-        additional_authorized_imports=[
-            "pandas",
-            "numpy",
-            "matplotlib",
-            "seaborn",
-            "plotly",
-            "sklearn",
-            "scipy",
-        ],
-        max_steps=5,
-        verbosity_level=1
-    )
-    return agent
-def process_request(
-    files: Union[str, List[str]],
-    user_query: str,
-    api_key: str = "",
-    temperature: float = 0.7,
-    history: Optional[List[tuple]] = None
-) -> tuple:
     """
-    Process user request with uploaded files and query.
     Args:
-        files: Path or list of paths to uploaded files
-        user_query: Natural language query from user
-        api_key: Optional API key for GPT-4
-        temperature: Model temperature
-        history: Chat history
-    Returns:
-        Tuple of (output, error, new_history)
     """
-    if api_key:
-        os.environ["OPENAI_API_KEY"] = api_key
     try:
-        # Create agent instance
-        agent = create_agent()
-        # Build context from files
-        file_context = ""
-        if isinstance(files, str):
-            files = [files]
-        for file in files:
-            filename = os.path.basename(file)
-            file_context += f"File uploaded: {filename}\n"
-        # Build complete prompt
         prompt = f"""
-        {file_context}
-        User request: {user_query}
         Please analyze the data and provide:
-        1. Code to perform the analysis
-        2. Explanation of approach
-        3. Visualizations if relevant
         4. Key insights and findings
-        """
-        # Execute agent
-        result = agent.run(prompt)
-        # Update history
-        new_history = history or []
-        new_history.append((user_query, result))
-        return result, None, new_history
     except Exception as e:
-        return None, str(e), history
-# Create Gradio interface
 def create_interface():
-    """Create Gradio interface for the AI coding assistant."""
-    with gr.Blocks(title="AI Coding Assistant") as interface:
         gr.Markdown("""
-        # AI Coding Assistant
-        Upload data files and ask questions in natural language to get code, analysis and visualizations.
         """)
         with gr.Row():
             with gr.Column():
-                files = gr.File(
-                    label="Upload Data Files",
-                    file_types=[".csv", ".xlsx", ".xls"],
-                    multiple=True
                 )
                 query = gr.Textbox(
                     label="What would you like to analyze?",
-                    placeholder="e.g., Create a scatter plot comparing column A vs B"
                 )
                 api_key = gr.Textbox(
-                    label="API Key (Optional)",
-                    placeholder="Your OpenAI API key",
                     type="password"
                 )
                 temperature = gr.Slider(
@@ -145,39 +201,29 @@ def create_interface():
                     value=0.7,
                     step=0.1
                 )
-                submit = gr.Button("Analyze")
             with gr.Column():
                 output = gr.Markdown(label="Output")
-                error = gr.Markdown(label="Errors")
-        # Hidden state for chat history
-        history = gr.State([])
         # Handle submissions
-        submit.click(
-            process_request,
-            inputs=[files, query, api_key, temperature, history],
-            outputs=[output, error, history]
         )
-        # Add examples
         gr.Examples(
             examples=[
-                [
-                    None,
-                    "Create a scatter plot showing the relationship between column A and B, with a trend line",
-                ],
-                [
-                    None,
-                    "Calculate summary statistics and identify any outliers in the numerical columns",
-                ],
-                [
-                    None,
-                    "Perform clustering analysis on the data and visualize the clusters",
-                ],
             ],
-            inputs=[files, query],
         )
     return interface

 import gradio as gr
 import pandas as pd
 from smolagents import CodeAgent, LiteLLMModel, tool
+# Tool definitions to showcase smolagents capabilities
+@tool
+def search_web(query: str) -> str:
+    """Simulate web search (for demo purposes)"""
+    return f"Simulated web search results for: {query}"
+@tool
+def analyze_dataframe(df: pd.DataFrame, analysis_type: str) -> str:
+    """
+    Analyze a pandas DataFrame based on specified analysis type.
+    Args:
+        df: DataFrame to analyze
+        analysis_type: Type of analysis to perform
+    """
+    if analysis_type == "summary":
+        return str(df.describe())
+    elif analysis_type == "info":
+        return str(df.info())
+    return "Unknown analysis type"
+@tool
+def plot_data(df: pd.DataFrame, plot_type: str) -> None:
     """
+    Create plots from DataFrame.
     Args:
+        df: DataFrame to plot
+        plot_type: Type of plot to create
     """
+    import matplotlib.pyplot as plt
+    import seaborn as sns
+    if plot_type == "correlation":
+        plt.figure(figsize=(10, 8))
+        sns.heatmap(df.corr(), annot=True)
+        plt.title("Correlation Heatmap")
+    elif plot_type == "distribution":
+        df.hist(figsize=(15, 10))
+        plt.tight_layout()
+def process_files(files: List[gr.File]) -> Optional[pd.DataFrame]:
+    """Process uploaded files into a DataFrame."""
+    if not files:
+        return None
+    dfs = []
+    for file in files:
+        try:
+            if file.name.endswith('.csv'):
+                df = pd.read_csv(file.name)
+            elif file.name.endswith(('.xlsx', '.xls')):
+                df = pd.read_excel(file.name)
+            else:
+                continue
+            dfs.append(df)
+        except Exception as e:
+            print(f"Error reading {file.name}: {str(e)}")
+    if not dfs:
+        return None
+    return pd.concat(dfs) if len(dfs) > 1 else dfs[0]
+def analyze_data(
+    files: List[gr.File],
+    query: str,
+    api_key: str,
+    temperature: float = 0.7,
+) -> str:
+    """Process user request and generate analysis using smolagents."""
+    if not api_key:
+        return "Error: Please provide an API key."
+    if not files:
+        return "Error: Please upload at least one file."
     try:
+        # Set up the environment
+        os.environ["OPENAI_API_KEY"] = api_key
+        # Create model and agent
+        model = LiteLLMModel(
+            model_id="gpt-4o-mini",
+            temperature=temperature
+        )
+        # Create agent with various tools to showcase capabilities
+        agent = CodeAgent(
+            tools=[search_web, analyze_dataframe, plot_data],
+            model=model,
+            additional_authorized_imports=[
+                "pandas",
+                "numpy",
+                "matplotlib",
+                "seaborn",
+                "plotly",
+                "sklearn",
+                "scipy"
+            ],
+            max_steps=5,
+            verbosity_level=1
+        )
+        # Process uploaded files
+        df = process_files(files)
+        if df is None:
+            return "Error: Could not process uploaded files."
+        # Build context
+        file_info = "\n".join([
+            "Uploaded files:",
+            *[f"- {f.name}" for f in files],
+            f"\nDataFrame Shape: {df.shape}",
+            f"Columns: {', '.join(df.columns)}",
+            "\nColumn Types:",
+            *[f"- {col}: {dtype}" for col, dtype in df.dtypes.items()]
+        ])
+        # Build prompt
         prompt = f"""
+        {file_info}
+        The data has been loaded into a pandas DataFrame called 'df'.
+        Available tools:
+        - search_web: Search for relevant information
+        - analyze_dataframe: Perform basic DataFrame analysis
+        - plot_data: Create various plots
+        Additional capabilities:
+        - Full pandas, numpy, matplotlib, seaborn access
+        - Machine learning with sklearn
+        - Statistical analysis with scipy
+        User request: {query}
         Please analyze the data and provide:
+        1. A clear explanation of your approach
+        2. Code for the analysis
+        3. Visualizations where relevant
         4. Key insights and findings
+        Make use of the available tools and libraries to provide comprehensive analysis.
+        """
+        # Run analysis
+        result = agent.run(prompt, additional_args={"df": df})
+        return result
     except Exception as e:
+        return f"Error occurred: {str(e)}"
 def create_interface():
+    """Create Gradio interface."""
+    with gr.Blocks(title="AI Agent Testing Interface") as interface:
         gr.Markdown("""
+        # AI Agent Testing Interface
+        Test the capabilities of AI agents using smolagents library. Upload data files and ask questions in natural language.
+        **Features:**
+        - Data analysis and visualization
+        - Machine learning capabilities
+        - Web search simulation
+        - Statistical analysis
+        - Custom tool integration
+        **Note**: Requires your own API key for GPT-4.
         """)
         with gr.Row():
             with gr.Column():
+                file = gr.File(
+                    label="Upload Data Files (CSV/Excel)",
+                    file_types=[".csv", ".xlsx", ".xls"]
                 )
                 query = gr.Textbox(
                     label="What would you like to analyze?",
+                    placeholder="e.g., Analyze the relationships between variables and create visualizations",
+                    lines=3
                 )
                 api_key = gr.Textbox(
+                    label="API Key (Required)",
+                    placeholder="Your API key",
                     type="password"
                 )
                 temperature = gr.Slider(
                     value=0.7,
                     step=0.1
                 )
+                analyze_btn = gr.Button("Analyze")
             with gr.Column():
                 output = gr.Markdown(label="Output")
         # Handle submissions
+        analyze_btn.click(
+            analyze_data,
+            inputs=[file, query, api_key, temperature],
+            outputs=output
         )
+        # Example queries
         gr.Examples(
             examples=[
+                [None, "Perform comprehensive exploratory data analysis including distributions, correlations, and key statistics"],
+                [None, "Create visualizations showing relationships between numeric variables"],
+                [None, "Identify and analyze outliers in the dataset"],
+                [None, "Perform clustering analysis and visualize the results"],
+                [None, "Calculate summary statistics and create box plots for numeric columns"],
+                [None, "Analyze trends and patterns in the data over time"],
             ],
+            inputs=[file, query]
         )
     return interface