Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| # chat.py | |
| import gradio as gr | |
| import json | |
| import pandas as pd | |
| import numpy as np | |
| from functools import lru_cache | |
| import promptquality as pq | |
| project_name = "agent-lb-v1" | |
| PROJECT_ID = pq.get_project_from_name(project_name).id | |
| def get_model_score_for_dataset(model, dataset): | |
| print(f"Getting metrics for {model} {project_name} for dataset {dataset}") | |
| run_name = f"{model} {dataset}" | |
| run_id = pq.get_run_from_name(run_name, PROJECT_ID).id | |
| rows = pq.get_rows( | |
| project_id=PROJECT_ID, | |
| run_id=run_id, | |
| task_type=None, | |
| config=None, | |
| starting_token=0, | |
| limit=1000, | |
| ) | |
| rationales = [d.metrics.tool_selection_quality_rationale for d in rows] | |
| scores = [ | |
| round(d.metrics.tool_selection_quality, 2) | |
| for d, rationale in zip(rows, rationales) | |
| if rationale | |
| ] | |
| explanations = [ | |
| d.metrics.tool_selection_quality_explanation | |
| for d, rationale in zip(rows, rationales) | |
| if rationale | |
| ] | |
| rationales = [r for r in rationales if r] | |
| mean_score = round(np.mean(scores), 2) | |
| return { | |
| "mean_score": mean_score, | |
| "scores": scores, | |
| "rationales": rationales, | |
| "explanations": explanations, | |
| } | |
| def get_updated_df(df, data): | |
| df["rationale"] = data["rationales"] | |
| df["explanation"] = data["explanations"] | |
| df["score"] = data["scores"] | |
| return df | |
| def get_chat_and_score_df(model, dataset): | |
| data = get_model_score_for_dataset(model, dataset) | |
| df = pd.read_parquet(f"datasets/{dataset}.parquet") | |
| df = get_updated_df(df, data) | |
| return df | |
| def format_chat_message(role, content): | |
| """Format individual chat messages with proper styling.""" | |
| role_style = role.lower() | |
| return f""" | |
| <div class="message {role_style}"> | |
| <div class="role-badge {role_style}-role">{role}</div> | |
| <div class="content">{content}</div> | |
| </div> | |
| """ | |
| def format_tool_info(tools): | |
| """Format tool information with proper styling.""" | |
| if isinstance(tools, str): | |
| try: | |
| tools = json.loads(tools) | |
| except: | |
| return "<div>No tool information available</div>" | |
| if not tools: | |
| return "<div>No tool information available</div>" | |
| tool_html = "" | |
| for tool in tools: | |
| tool_html += f""" | |
| <div class="tool-section"> | |
| <div class="tool-name">{tool.get('name', 'Unnamed Tool')}</div> | |
| <div class="tool-description">{tool.get('description', 'No description available')}</div> | |
| <div class="tool-parameters"> | |
| {format_parameters(tool.get('parameters', {}))} | |
| </div> | |
| </div> | |
| """ | |
| return f'<div class="tool-info-panel">{tool_html}</div>' | |
| def format_parameters(parameters): | |
| if not parameters: | |
| return "<div>No parameters</div>" | |
| params_html = "" | |
| for name, desc in parameters.items(): | |
| params_html += f""" | |
| <div class="parameter"> | |
| <span class="param-name">{name}:</span> {desc} | |
| </div> | |
| """ | |
| return params_html | |
| def format_metrics(score, rationale, explanation): | |
| """Format metrics display with proper styling.""" | |
| return f""" | |
| <div class="metrics-panel"> | |
| <div class="metric-section"> | |
| <h3>Score</h3> | |
| <div class="score-display">{score:.2f}</div> | |
| </div> | |
| <div class="metric-section"> | |
| <h3>Rationale</h3> | |
| <div class="explanation-text">{rationale}</div> | |
| </div> | |
| <div class="metric-section"> | |
| <h3>Explanation</h3> | |
| <div class="explanation-text">{explanation}</div> | |
| </div> | |
| </div> | |
| """ | |
| def update_chat_display(df, index): | |
| """Update the chat visualization for a specific index.""" | |
| if df is None or df.empty or index >= len(df): | |
| return ( | |
| "<div>No data available</div>", | |
| "<div>No metrics available</div>", | |
| "<div>No tool information available</div>", | |
| ) | |
| row = df.iloc[index] | |
| # Format chat messages | |
| messages = json.loads(row["conversation"]) | |
| chat_html = f""" | |
| <div class="chat-panel"> | |
| {"".join([format_chat_message(msg["role"], msg["content"]) | |
| for msg in messages])} | |
| </div> | |
| """ | |
| # Format metrics | |
| metrics_html = format_metrics(row["score"], row["rationale"], row["explanation"]) | |
| # Format tool info | |
| tool_html = format_tool_info(row["tools_langchain"]) | |
| return chat_html, metrics_html, tool_html | |
| def filter_and_update_display(model, dataset, selected_scores, current_index): | |
| try: | |
| # Get data and filter by scores | |
| df_chat = get_chat_and_score_df(model, dataset) | |
| if selected_scores: | |
| df_chat = df_chat[df_chat["score"].isin(selected_scores)] | |
| if df_chat.empty: | |
| return ( | |
| "<div>No data available for selected filters</div>", | |
| "<div>No metrics available</div>", | |
| "<div>No tool information available</div>", | |
| gr.update(maximum=0, value=0), | |
| "0/0", | |
| ) | |
| # Update index bounds | |
| max_index = len(df_chat) - 1 | |
| current_index = min(current_index, max_index) | |
| # Get displays for current index | |
| chat_html, metrics_html, tool_html = update_chat_display(df_chat, current_index) | |
| return ( | |
| chat_html, | |
| metrics_html, | |
| tool_html, | |
| gr.update(maximum=max_index, value=current_index), | |
| f"{current_index + 1}/{len(df_chat)}", | |
| ) | |
| except Exception as e: | |
| print(f"Error in filter_and_update_display: {str(e)}") | |
| return ( | |
| f"<div>Error: {str(e)}</div>", | |
| "<div>No metrics available</div>", | |
| "<div>No tool information available</div>", | |
| gr.update(maximum=0, value=0), | |
| "0/0", | |
| ) | |