MNB-Leaderboard / app.py
TheFey's picture
Upload 5 files
4902e39 verified
import dash
from dash import html, dcc, Input, Output, State
import dash_ag_grid as dag
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import base64
import os
def load_leaderboard_data(csv_file_path):
try:
df = pd.read_csv(csv_file_path, na_values=['NA'])
# Add type sort value
def get_type_sort_value(row):
if row['Base']:
return 0 # B
return 4
df['model_type_sort'] = df.apply(get_type_sort_value, axis=1)
# Store model name and link separately
df['Model_Link'] = df['Model Link'].fillna('')
df['Model_Display'] = df['author/model_name']
# Add pinned and selected columns
df['pinned'] = False
df['selected'] = False
# Round numeric columns and handle NA values
numeric_columns = df.select_dtypes(include=[np.number]).columns
for col in numeric_columns:
df[col] = df[col].apply(lambda x: None if pd.isna(x) else round(x, 3))
# Sort with multiple keys in the required order
df = df.sort_values(
by=['Score', '8k 🪡', '16k 🪡'],
ascending=[False, False, False] # Score -> 8k -> 16k
)
return df
except Exception as e:
print(f"Error loading CSV file: {e}")
return pd.DataFrame()
# Initialize the Dash app
app = dash.Dash(__name__, external_stylesheets=[
"https://use.fontawesome.com/releases/v5.15.4/css/all.css"
])
server = app.server
# Custom CSS
app.index_string = '''
<!DOCTYPE html>
<html>
<head>
{%metas%}
<title>Fey's Multi-Needle & Behavior Leaderboard</title>
{%favicon%}
{%css%}
<style>
:root {
--bg-color: #ffffff;
--text-color: #000000;
--grid-bg: #ffffff;
--grid-border: #ddd;
--link-color: #007bff;
--secondary-text: #666;
--pinned-bg: #f5f5f5;
--border-color: #ccc;
}
@media (prefers-color-scheme: dark) {
:root {
--bg-color: #0d1117;
--text-color: #e6e6e6;
--grid-bg: #161b22;
--grid-border: #30363d;
--link-color: #58a6ff;
--secondary-text: #8b949e;
--pinned-bg: #1c2128;
--border-color: #30363d;
color-scheme: dark;
}
.ag-theme-alpine .ag-menu {
background-color: #161b22 !important;
color: #e6e6e6 !important;
border-color: #30363d !important;
}
.ag-theme-alpine .ag-filter-condition {
background-color: #161b22 !important;
border-color: #30363d !important;
}
.ag-theme-alpine .ag-mini-filter input,
.ag-theme-alpine .ag-filter input {
background-color: #0d1117 !important;
color: #e6e6e6 !important;
border-color: #30363d !important;
}
.ag-theme-alpine .ag-select .ag-picker-field-wrapper {
background-color: #0d1117 !important;
color: #e6e6e6 !important;
border-color: #30363d !important;
}
.ag-theme-alpine .ag-picker-field-wrapper {
border-color: #30363d !important;
}
.ag-theme-alpine .ag-select-list {
background-color: #161b22 !important;
color: #e6e6e6 !important;
}
.ag-theme-alpine .ag-select-list-item:hover {
background-color: #1c2128 !important;
}
.ag-theme-alpine input[type="date"] {
color-scheme: dark;
background-color: #161b22;
color: #e6e6e6;
border-color: #30363d;
}
.ag-theme-alpine input[type="date"]::-webkit-calendar-picker-indicator {
background-color: #161b22;
cursor: pointer;
filter: invert(0.8);
}
}
body {
font-family: 'Segoe UI', Arial, sans-serif;
margin: 0;
padding: 20px;
background-color: var(--bg-color);
color: var(--text-color);
}
/* Header and Title Styles */
.page-title {
text-align: center;
margin: 0;
font-size: 38px;
color: var(--text-color) !important;
}
.page-subtitle {
text-align: center;
margin: 0;
font-size: 20px;
font-weight: 600;
color: var(--text-color) !important;
}
/* Filter Styles */
.model-type-filter {
color: var(--text-color) !important;
margin-right: 10px;
font-weight: bold;
},
#model-type-filter label,
#na-model-filter label {
color: var(--text-color) !important;
margin-right: 10px;
font-weight: bold;
}
/* Grid Styles */
.ag-theme-alpine {
--ag-font-family: 'Segoe UI', Arial, sans-serif;
--ag-font-size: 14px;
--ag-background-color: var(--grid-bg);
--ag-border-color: var(--grid-border);
--ag-header-background-color: var(--grid-bg);
--ag-odd-row-background-color: var(--grid-bg);
--ag-header-foreground-color: var(--text-color);
--ag-foreground-color: var(--text-color);
--ag-row-border-color: var(--grid-border);
}
.ag-theme-alpine .ag-pinned-left-header,
.ag-theme-alpine .ag-cell-last-left-pinned {
border-right: 2px solid var(--grid-border) !important;
margin-right: -1px !important;
}
/* Mobile specific fixes */
.ag-theme-alpine.ag-grid-mobile .ag-pinned-left-header,
.ag-theme-alpine.ag-grid-mobile .ag-cell-last-left-pinned {
border-right: 2px solid var(--grid-border) !important;
}
/* Ensure pinned columns maintain their position */
.ag-theme-alpine .ag-pinned-left-cols-container,
.ag-theme-alpine .ag-pinned-left-header {
position: sticky;
left: 0;
z-index: 1;
}
.ag-floating-top {
border-bottom: 3px solid var(--border-color) !important;
}
.ag-floating-top:empty {
border-bottom: none !important;
}
.pinned-row {
background-color: var(--pinned-bg) !important;
font-weight: 500;
}
/* Text Alignment Classes */
.ag-left-aligned-header {
text-align: left !important;
}
.ag-left-aligned-cell {
text-align: left !important;
}
.ag-header-cell-text {
white-space: normal !important;
line-height: 1.2em;
overflow: visible;
padding-bottom: 4px;
}
.ag-header-cell {
height: auto !important;
min-height: 48px;
}
.wrap-text {
white-space: normal !important;
line-height: 1.2em;
}
.no-break {
white-space: nowrap !important;
}
/* Border Classes */
.border-left {
border-left: 2px solid var(--grid-border) !important;
margin-left: -2px !important;
}
.border-right {
border-right: 2px solid var(--grid-border) !important;
}
/* Link Styles */
.model-link {
color: var(--link-color) !important;
text-decoration: none;
}
.model-link:visited {
color: var(--link-color) !important;
}
.model-link:active {
color: var(--link-color) !important;
}
.model-link:focus {
color: var(--link-color) !important;
}
.ag-theme-alpine a,
.ag-theme-alpine a:link,
.ag-theme-alpine a:visited,
.ag-theme-alpine a:hover,
.ag-theme-alpine a:active,
.ag-theme-alpine a:focus {
color: var(--link-color) !important;
text-decoration: none !important;
}
.ag-theme-alpine a:hover {
text-decoration: underline !important;
}
.source-link {
color: var(--link-color) !important;
text-decoration: none;
}
/* Details/Summary Styles */
.details-summary {
cursor: pointer;
font-weight: bold;
font-size: 1.2em;
margin-top: 20px;
color: var(--text-color) !important;
}
.ideology-note {
color: var(--secondary-text) !important;
font-size: 0.9em;
}
/* Markdown Content */
.markdown-content {
color: var(--text-color) !important;
}
.markdown-content a {
color: var(--link-color) !important;
}
/* Ko-fi Button Visibility */
.kofi-light {
display: none;
}
.kofi-dark {
display: none;
}
@media (prefers-color-scheme: light) {
.kofi-light {
display: block;
}
}
@media (prefers-color-scheme: dark) {
.kofi-dark {
display: block;
}
/* Dark Theme Specific Overrides */
.ag-theme-alpine {
--ag-background-color: #161b22 !important;
--ag-header-background-color: #161b22 !important;
--ag-odd-row-background-color: #161b22 !important;
--ag-row-background-color: #161b22 !important;
--ag-header-foreground-color: #e6e6e6 !important;
--ag-foreground-color: #e6e6e6 !important;
--ag-row-border-color: #30363d !important;
--ag-border-color: #30363d !important;
--ag-secondary-border-color: #30363d !important;
--ag-alpine-active-color: #58a6ff !important;
--ag-selected-row-background-color: #1c2128 !important;
--ag-row-hover-color: #1c2128 !important;
}
.ag-header-cell-filtered {
background-color: rgba(88, 166, 255, 0.1) !important;
}
input[type="checkbox"] {
accent-color: var(--link-color);
}
/* Ensure text colors in dark mode */
.page-title,
.page-subtitle,
.model-type-filter label,
#model-type-filter label,
#na-model-filter label {
color: #e6e6e6 !important;
}
.filter-description,
.ideology-note {
color: #8b949e !important;
}
}
a:visited {
color: var(--link-color) !important;
}
.markdown-content a:visited {
color: var(--link-color) !important;
}
.split-header-container {
display: flex;
flex-direction: column;
line-height: 1.2em;
}
.split-header-top, .split-header-bottom {
white-space: nowrap;
}
.ag-theme-alpine .new-emoji-cell.ag-cell {
font-size: 18px !important;
display: flex !important;
align-items: center !important;
justify-content: flex-start !important;
padding-left: 12px !important;
}
.ag-ltr .ag-cell {
border-right-width: 1px;
justify-content: center;
display: flex;
}
.ag-header-cell-label {
overflow: hidden;
text-overflow: ellipsis;
justify-content: center;
}
div.ag-cell.ag-cell-not-inline-editing.ag-cell-normal-height.ag-cell-last-left-pinned {
justify-content: right;
}
</style>
</head>
<body>
{%app_entry%}
<footer>
{%config%}
{%scripts%}
{%renderer%}
</footer>
</body>
</html>
'''
# Load data
df = load_leaderboard_data("fmnb-leaderboard-data.csv")
# Define helper functions
def create_numeric_column(field, width=125, sort=None, sortIndex=None, **kwargs):
column = {
"field": field,
"width": width,
"filter": "agNumberColumnFilter",
"filterParams": {
"defaultOption": "inRange",
"filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
},
"headerClass": "ag-left-aligned-header wrap-text",
"cellClass": "ag-left-aligned-cell",
"wrapHeaderText": True,
"autoHeaderHeight": True,
"suppressSizeToFit": True,
"sortingOrder": ['desc', 'asc'],
"comparator": {
"function": """
function(valueA, valueB, nodeA, nodeB, isInverted) {
const a = nodeA.data.__sortValue;
const b = nodeB.data.__sortValue;
return a - b;
}
"""
}
}
# Update filterParams if provided in kwargs
if 'filterParams' in kwargs:
column['filterParams'].update(kwargs['filterParams'])
if sort:
column["sort"] = sort
if sortIndex is not None:
column["sortIndex"] = sortIndex
return column
def create_text_column(field, width=120):
return {
"field": field,
"width": width,
"filter": "agTextColumnFilter",
"filterParams": {
"defaultOption": "contains",
"filterOptions": ['contains', 'notContains', 'startsWith', 'endsWith']
},
"headerClass": "ag-left-aligned-header wrap-text",
"cellClass": "ag-left-aligned-cell",
"wrapHeaderText": True,
"autoHeaderHeight": True
}
# Define column configurations
columnDefs = [
{
"headerName": "📌",
"field": "pinned",
"width": 55,
"filter": False,
"suppressMenu": True,
"cellRenderer": "PinRenderer",
"sortable": False,
"pinned": "left"
},
{
"field": "Model_Display",
"headerName": "Model",
"cellRenderer": "ModelLink",
"filter": "agTextColumnFilter",
"filterParams": {
"defaultOption": "contains",
"filterOptions": ['contains', 'notContains', 'startsWith', 'endsWith']
},
"width": 420,
"suppressMenu": False,
"pinned": "left",
"headerClass": "ag-left-aligned-header wrap-text",
"wrapHeaderText": True,
"autoHeaderHeight": True
},
{
"field": "Score",
"FontWeight": 700,
"width": 110,
"filter": "agNumberColumnFilter",
"filterParams": {
"defaultOption": "greaterThanOrEqual"
},
"headerClass": "ag-left-aligned-header wrap-text",
"cellClass": ["ag-left-aligned-cell", "border-left"],
"wrapHeaderText": True,
"autoHeaderHeight": True,
"suppressSizeToFit": True,
"sortingOrder": ['desc', 'asc'],
"cellRenderer": "ScoreRenderer"
},
{
"headerName": "Behavior",
"headerClass": "ag-left-aligned-header wrap-text",
"cellClass": ["ag-left-aligned-cell", "border-left"],
"field": "Behavior",
"width": 120,
"filter": False,
"suppressMenu": True,
"cellRenderer": "BehaviorRenderer",
"sortable": True,
"sortingOrder": ['asc', 'desc']
},
{
"field": "8k 🪡",
"headerName": "8k 🪡",
"width": 100,
"filter": "agNumberColumnFilter",
"filterParams": {
"defaultOption": "greaterThanOrEqual",
"filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
},
"headerClass": "ag-left-aligned-header wrap-text",
"cellClass": ["ag-left-aligned-cell", "border-left"],
"wrapHeaderText": True,
"autoHeaderHeight": True,
"suppressSizeToFit": True,
"sortingOrder": ['desc', 'asc']
},
create_numeric_column("16k 🪡", width=100, filterParams={
"defaultOption": "greaterThanOrEqual"
}),
# Misc Columns
{
"field": "Size",
"width": 100,
"filter": "agNumberColumnFilter",
"filterParams": {
"defaultOption": "equals",
"filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
},
"headerClass": "ag-left-aligned-header wrap-text",
"cellClass": "ag-left-aligned-cell",
"wrapHeaderText": True,
"autoHeaderHeight": True,
"suppressSizeToFit": True,
"sortingOrder": ['desc', 'asc'],
},
{
"field": "model_type_sort",
"hide": True
},
{
"headerName": "Type",
"field": "model_type_sort", # sort field directly
"width": 90,
"filter": False,
"suppressMenu": True,
"cellRenderer": "TypeRenderer",
"sortable": True,
"sortingOrder": ['asc', 'desc']
},
{
"headerName": "Settings",
"field": "Settings",
"width": 120,
"filter": False,
"suppressMenu": True,
"cellClass": "ag-left-aligned-cell",
},
{
"headerName": "New",
"field": "New",
"width": 70,
"filter": False,
"suppressMenu": True,
"cellClass": "ag-left-aligned-cell",
}
]
# Define the grid options with postSort
dashGridOptions = {
"animateRows": True,
"pagination": False,
"enableCellTextSelection": True,
"ensureDomOrder": True,
"suppressRowClickSelection": True,
"suppressCellFocus": True,
"getRowId": "params => params.data.Model_Display",
"pinnedTopRowData": [],
"suppressMaintainUnsortedOrder": True,
"suppressMultiSort": True,
"rowBuffer": 10,
"maxBlocksInCache": 2,
"icons": {
"menu": '<i class="fas fa-search" style="color: var(--text-color)"></i>'
},
"theme": "ag-theme-alpine-dark" if "prefers-color-scheme: dark" else "ag-theme-alpine",
"columnState": {
"function": """
function() {
return {
columnVisibility: {}
};
}
"""
}
}
# Define the layout
app.layout = html.Div([
dcc.Store(id='pinned-models-store', data=[]),
# Title
html.Div([
html.H1("🪡 Fey's MNB Leaderboard 🪡",
className="page-title",
style={'fontSize': '38px'}),
html.H2("Multi-Needle & Behavior Evaluation",
className="page-subtitle"),
], style={'marginBottom': '30px'}),
# Notice
# html.Div(
# html.P(
# "None",
# style={'textAlign': 'center', 'color': 'red', 'fontSize': '0.9em'}
# )
# ),
# Grid
html.Div([
dag.AgGrid(
id='leaderboard-grid',
columnDefs=columnDefs,
rowData=df.to_dict('records'),
defaultColDef={
"sortable": True,
"resizable": True,
"filter": "agNumberColumnFilter",
"floatingFilter": False,
"sortingOrder": ['desc', 'asc'],
"filterParams": {
"defaultOption": "between"
},
"comparator": {
"function": """
function(valueA, valueB, nodeA, nodeB, isInverted) {
const isEmptyA = valueA === null || valueA === undefined || valueA === '' || isNaN(valueA);
const isEmptyB = valueB === null || valueB === undefined || valueB === '' || isNaN(valueB);
// Force empty values to bottom
if (isEmptyA && !isEmptyB) return 1;
if (!isEmptyA && isEmptyB) return -1;
if (isEmptyA && isEmptyB) return 0;
// Normal comparison for non-empty values
if (typeof valueA === 'number' && typeof valueB === 'number') {
return valueA - valueB;
}
return String(valueA).localeCompare(String(valueB));
}
"""
}
},
dashGridOptions=dashGridOptions,
dangerously_allow_code=True,
className="ag-theme-alpine",
style={"height": "600px", "width": "100%"},
enableEnterpriseModules=False,
getRowId="params.data.Model_Display"
)
], style={'marginBottom': '10px'}),
# Description
html.Div([
html.H3("Info", style={'fontSize': '22px', 'marginBottom': '0px'}),
html.P([html.Strong(""), "This latest reiteration of the leaderboard has finally made it to Hugging Face with extended functionality based on the UGI leaderboard, enjoy!"],
style={'marginTop': '7px', 'marginBottom': '4px'}),
html.P([html.Strong("Score:"), " Primarily based on the scoring in the multi-needle test at 8k / 16k context, weighted towards 16k."],
style={'marginTop': '7px', 'marginBottom': '4px'}),
html.P([html.Strong("Behavior:"), " Qualitative assessment of the model's behavior during the evaluation. User discretion is advised, as it only has a minor impact on the final score."], style={'marginTop': '7px', 'marginBottom': '4px'}),
html.P([html.Strong("Difficulty:"), " The current difficulty is only set at a medium level. (Silver Tier 1)"], style={'marginTop': '7px', 'marginBottom': '4px'}),
html.P([html.Strong("Version:"), " 4.0"], style={'marginTop': '7px', 'marginBottom': '4px'}),
html.Br(),
html.Details([
html.Summary("Recommended Settings",
style={
'fontWeight': 'bold',
'fontSize': '1em',
'marginLeft': '0px',
'cursor': 'pointer'
}),
html.Ul([
html.Br(),
html.Li(["1: Recommended to manually set a RoPE Frequency of 2650000 with Nemo based models when using >8k context.",html.Br(),"LLama.cpp: --rope-freq-base 2650000 (RoPE Base in KoboldCpp)",html.Br(),"EXL2: rope_alpha 2.65"]),
html.Br(),
html.Li("2: Recommended to set </s> as an additional stopping token when using these models with ChatML."),
], style={'marginTop': '0px', 'marginBottom': '16px', 'marginLeft': '40px'})
], style={'marginBottom': '16px'}),
], style={
'maxWidth': '1200px',
'margin': '0 auto',
'padding': '0 20px',
'color': 'var(--text-color)'
}),
], style={'maxWidth': '100%', 'margin': '0 auto'})
if __name__ == '__main__':
app.run_server(host='0.0.0.0', port=8050)
app.clientside_callback(
"""
function(n_clicks) {
if (!window.gridApi) return;
const pinnedRows = window.gridApi.getGridOption('pinnedTopRowData') || [];
if (pinnedRows.length > 0) {
const pinnedIds = new Set(pinnedRows.map(row => row.Model_Display));
const currentRows = [];
window.gridApi.forEachNode(node => {
if (!pinnedIds.has(node.data.Model_Display)) {
currentRows.push(node.data);
}
});
window.gridApi.setGridOption('rowData', currentRows);
}
return window.dash_clientside.no_update;
}
""",
Output('leaderboard-grid', 'rowData'),
Input('model-type-filter', 'value')
)