Spaces:

CZLC
/

BenCzechMark

Running

App Files Files Community

idolezal commited on Feb 17

Commit

a2b6394

1 Parent(s): 32a9aae

Try analyze winscore with bokeh

Browse files

Files changed (3) hide show

analyze_winscore.py +181 -0
app.py +4 -8
server.py +37 -0

analyze_winscore.py ADDED Viewed

	@@ -0,0 +1,181 @@

+# -*- coding: utf-8 -*-
+import csv
+import random
+import numpy as np
+from bokeh.plotting import figure
+from bokeh.models import LabelSet, LogScale
+from bokeh.palettes import Turbo256  # A color palette with enough colors
+from bokeh.models import ColumnDataSource
+# Function to fit a polynomial curve and return the x and y values of the fitted curve
+def fit_curve(x, y, degree=1):
+    # Fit a polynomial of given degree
+    coeffs = np.polyfit(x, y, degree)
+    poly = np.poly1d(coeffs)
+    x_fit = np.linspace(min(x), max(x), 100)
+    y_fit = poly(x_fit)
+    return x_fit, y_fit
+# Function to detect and remove outliers using the IQR method
+def remove_outliers(x, y):
+    x = np.array(x)
+    y = np.array(y)
+    # Calculate Q1 (25th percentile) and Q3 (75th percentile)
+    Q1_x, Q3_x = np.percentile(x, [25, 75])
+    Q1_y, Q3_y = np.percentile(y, [25, 75])
+    IQR_x = Q3_x - Q1_x
+    IQR_y = Q3_y - Q1_y
+    # Define bounds for outliers
+    lower_bound_x = Q1_x - 1.5 * IQR_x
+    upper_bound_x = Q3_x + 1.5 * IQR_x
+    lower_bound_y = Q1_y - 1.5 * IQR_y
+    upper_bound_y = Q3_y + 1.5 * IQR_y
+    # Filter out outliers
+    mask_x = (x >= lower_bound_x) & (x <= upper_bound_x)
+    mask_y = (y >= lower_bound_y) & (y <= upper_bound_y)
+    mask = mask_x & mask_y
+    return x[mask], y[mask], x[~mask], y[~mask]
+def get_ldb_records(name_map, csv_file_path):
+    model_mapping = {model_title: model_title for model_title in name_map.values()}
+    ldb_records={}
+    with open(csv_file_path, mode='r') as file:
+        reader = csv.DictReader(file)
+        for row in reader:
+            if row['Model'].startswith("Qwen/Qwen2.5"):
+                continue
+            sanitized_name = model_mapping[row['Model']]
+            ldb_records[sanitized_name] = row
+    return ldb_records
+def create_scatter_plot_with_curve_with_variances_named(category, variance_across_categories, x, y, sizes, model_names, ldb_records):
+    FONTSIZE = 10
+    # Remove outliers
+    x_filtered, y_filtered, x_outliers, y_outliers = remove_outliers(x, y)
+    # Scale the variance to a range suitable for marker sizes (e.g., between 5 and 30)
+    min_marker_size = 5
+    max_marker_size = 30
+    def scale_variance_to_size(variance):
+        # Scale variance to marker size (linear mapping)
+        return min_marker_size + (variance - min(variance_across_categories.values())) * (max_marker_size - min_marker_size) / (max(variance_across_categories.values()) - min(variance_across_categories.values()))
+    # Function to get the variance for a given model name
+    def get_variance_for_model(model_name):
+        print(model_name)
+        return variance_across_categories.get(model_name, 0)  # Default to 0 if model not found
+    # Get marker sizes and variances for the filtered data
+    filtered_variances = [get_variance_for_model(mname) for mname in np.array(model_names)[np.in1d(x, x_filtered)]]
+    marker_sizes_filtered = [scale_variance_to_size(var) for var in filtered_variances]
+    # Get marker sizes and variances for the outlier data
+    outlier_variances = [get_variance_for_model(mname) for mname in np.array(model_names)[np.in1d(x, x_outliers)]]
+    marker_sizes_outliers = [scale_variance_to_size(var) for var in outlier_variances]
+    # Randomly assign symbols to the filtered data points
+    filtered_symbols = ['circle' if ldb_records[mname]['Type'] == 'chat' else 'triangle' for mname in np.array(model_names)[np.in1d(x, x_filtered)]]
+    # Randomly assign symbols to the outlier data points
+    outlier_symbols = ['circle' if ldb_records[mname]['Type'] == 'chat' else 'triangle' for mname in np.array(model_names)[np.in1d(x, x_outliers)]]
+    # Define a color palette with enough colors
+    stride = len(Turbo256) // len(model_names)
+    color_palette = list(Turbo256[::stride])  # Adjust this palette size based on the number of data points
+    random.shuffle(color_palette)
+    # Create unique colors for filtered data
+    filtered_colors = [color_palette[i % len(color_palette)] for i in range(len(x_filtered))]
+    # Create unique colors for outliers
+    outlier_colors = [color_palette[(i + len(x_filtered)) % len(color_palette)] for i in range(len(x_outliers))]
+    # Create ColumnDataSource with filtered data
+    source_filtered = ColumnDataSource(data={
+        'x': x_filtered,
+        'y': y_filtered,
+        'sizes': np.array(sizes)[np.in1d(x, x_filtered)],  # Keep original model sizes
+        'marker_sizes': marker_sizes_filtered,  # New field for marker sizes based on variance
+        'model_names': np.array(model_names)[np.in1d(x, x_filtered)],
+        'variance': filtered_variances,  # New field for variance
+        'color': filtered_colors,
+        'symbol': filtered_symbols
+    })
+    # Create ColumnDataSource with outlier data
+    source_outliers = ColumnDataSource(data={
+        'x': x_outliers,
+        'y': y_outliers,
+        'sizes': np.array(sizes)[np.in1d(x, x_outliers)],  # Keep original model sizes
+        'marker_sizes': marker_sizes_outliers,  # New field for marker sizes based on variance
+        'model_names': np.array(model_names)[np.in1d(x, x_outliers)],
+        'variance': outlier_variances,  # New field for variance
+        'color': outlier_colors,
+        'symbol': outlier_symbols
+    })
+    # Create a figure for the category
+    p = figure(#width=900, height=800, #title=f"{category} vs Model Size vs Variance Across Categories",
+               #tools="pan,wheel_zoom,box_zoom,reset,save",
+               tooltips=[("Model", "@model_names"),
+                         ("Model Size (B parameters)", "@sizes"),
+                         ("Variance", "@variance"),  # Added variance to the tooltip
+                         ("Performance", "@y")])
+    # Plot filtered data with unique colors and scaled marker sizes
+    p.scatter('x', 'y', size='marker_sizes', source=source_filtered, fill_alpha=0.6, color='color', marker='symbol')
+    # Plot outliers with unique colors and scaled marker sizes
+    p.scatter('x', 'y', size='marker_sizes', source=source_outliers, fill_alpha=0.6, color='color', marker='symbol')
+    # Fit and plot a curve
+    x_fit, y_fit = fit_curve(x_filtered, y_filtered, degree=1)  # You can adjust the degree of the polynomial
+    p.line(x_fit, y_fit, line_color='gray', line_width=2, line_dash='dashed')
+    # Add labels (with slight offset to avoid overlap)
+    p.add_layout(LabelSet(x='x', y='y', text='model_names', source=source_filtered,
+                     x_offset=5, y_offset=8, text_font_size=f"{FONTSIZE-4}pt", text_color='black'))
+    p.add_layout(LabelSet(x='x', y='y', text='model_names', source=source_outliers,
+                      x_offset=5, y_offset=8, text_font_size=f"{FONTSIZE-4}pt", text_color='black'))
+    # Set axis labels
+    p.xaxis.axis_label = 'Model Size (B parameters)'
+    p.yaxis.axis_label = f'{category}'
+    # Set axis label font sizes
+    p.xaxis.axis_label_text_font_size = f"{FONTSIZE}pt"  # Set font size for x-axis label
+    p.yaxis.axis_label_text_font_size = f"{FONTSIZE}pt" # Set font size for y-axis label
+    # Increase tick label font sizes
+    p.xaxis.major_label_text_font_size = f"{FONTSIZE}pt"  # Increase x-axis tick label size
+    p.yaxis.major_label_text_font_size = f"{FONTSIZE}pt"  # Increase y-axis tick label size
+    #p.x_range.start = 1
+    #p.x_range.end = 18
+    #p.y_range.end = 60
+    p.x_scale = LogScale()
+    p.xaxis.ticker = [1,2,4,7,12,15]
+    p.xaxis.axis_label_text_font_style = "normal"
+    p.yaxis.axis_label_text_font_style = "normal"
+    return p
+# EOF

app.py CHANGED Viewed

@@ -6,8 +6,6 @@ import gradio as gr
 from gradio.themes.utils.sizes import text_md
 from gradio_modal import Modal
-from bokeh.plotting import figure
 from content import (
     HEADER_MARKDOWN,
     LEADERBOARD_TAB_TITLE_MARKDOWN,
@@ -628,12 +626,10 @@ def gradio_app():
                     gr.Markdown(LEADERBOARD_TAB_TITLE_MARKDOWN)
                     with gr.Row():
-                        x = list(range(11))
-                        y0 = x
-                        y1 = [10 - i for i in x]
-                        fig = figure(width=250, height=250, title='Plot1')
-                        fig.circle(x, y0, size=10, color="navy", alpha=0.5)
-                        p1 = gr.Plot(value=fig, label='Plot 1')
                     with gr.Row():
                         leaderboard_category_of_tasks = gr.Dropdown(

 from gradio.themes.utils.sizes import text_md
 from gradio_modal import Modal
 from content import (
     HEADER_MARKDOWN,
     LEADERBOARD_TAB_TITLE_MARKDOWN,
                     gr.Markdown(LEADERBOARD_TAB_TITLE_MARKDOWN)
                     with gr.Row():
+                        gr.Plot(
+                            value=leaderboard_server.get_bokeh_figure(),
+                            label='Foo',
+                        )
                     with gr.Row():
                         leaderboard_category_of_tasks = gr.Dropdown(

server.py CHANGED Viewed

@@ -622,6 +622,43 @@ class LeaderboardServer:
         dataframe.to_csv(filepath, index=False)
         return filepath
     def get_leaderboard_csv(self, pre_submit=None, category=None):
         if pre_submit == None:
             category = category if category else self.TASKS_CATEGORY_OVERALL

         dataframe.to_csv(filepath, index=False)
         return filepath
+    def get_bokeh_figure(self):
+        import numpy as np
+        from analyze_winscore import get_ldb_records, create_scatter_plot_with_curve_with_variances_named
+        #m = self.TASKS_METADATA
+        #tournament = self.tournament_results
+        name_map = self.submission_id_to_model_title
+        category = self.TASKS_CATEGORY_OVERALL
+        csv_file_path = self.leaderboard_dataframes_csv[category]
+        ldb_records = get_ldb_records(name_map, csv_file_path)
+        categories = self.TASKS_CATEGORIES
+        model_names = list(ldb_records.keys())
+        sizes = [float(ldb_records[model]['# θ (B)']) for model in model_names]
+        average_performance = [float(ldb_records[model]['Average ⬆️']) for model in model_names]
+        variances={}
+        for model, record in ldb_records.items():
+            r = [float(record[cat]) for cat in categories]
+            variances[model] = np.var(r)
+        print(variances)
+        print(min(variances.values()))
+        variance_across_categories = variances
+        fig = create_scatter_plot_with_curve_with_variances_named(
+            'Overall Duel Win Score',
+            variance_across_categories,
+            sizes,
+            average_performance,
+            sizes,
+            model_names,
+            ldb_records,
+        )
+        return fig
     def get_leaderboard_csv(self, pre_submit=None, category=None):
         if pre_submit == None:
             category = category if category else self.TASKS_CATEGORY_OVERALL