Spaces:

allenai
/

reward-bench

Running

natolambert commited on Feb 15, 2024

Commit

b7aaef4

1 Parent(s): 8799e00

add model type

Files changed (2) hide show

app.py CHANGED Viewed

@@ -52,7 +52,7 @@ def avg_over_herm(dataframe):
         new_df[subset] = np.round(np.average(sub_data, axis=1, weights=sub_counts), 2) # take the weighted average
         # new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
-    keep_columns = ["model",] + list(subset_mapping.keys())
     # keep_columns = ["model", "average"] + subsets
     new_df = new_df[keep_columns]
@@ -69,7 +69,7 @@ def length_bias_check(dataframe):
     Then, take the average of the three buckets as "average"
     """
     new_df = dataframe.copy()
-    existing_subsets = new_df.columns[2:]
     final_subsets = ["Length Bias", "Neutral", "Terse Bias"]
     # new data is empty list dict for each final subset
     new_data = {s: [] for s in final_subsets}
@@ -105,8 +105,8 @@ herm_data_length = length_bias_check(herm_data).sort_values(by='Terse Bias', asc
 prefs_data = load_all_data(repo_dir_herm, subdir="pref-sets").sort_values(by='average', ascending=False)
 # prefs_data_sub = expand_subsets(prefs_data).sort_values(by='average', ascending=False)
-col_types_herm = ["markdown"] + ["number"] * (len(herm_data.columns) - 1)
-col_types_herm_avg = ["markdown"] + ["number"] * (len(herm_data_avg.columns) - 1)
 cols_herm_data_length = ["markdown"] + ["number"] * (len(herm_data_length.columns) - 1)
 col_types_prefs = ["markdown"] + ["number"] * (len(prefs_data.columns) - 1)
 # col_types_prefs_sub = ["markdown"] + ["number"] * (len(prefs_data_sub.columns) - 1)

         new_df[subset] = np.round(np.average(sub_data, axis=1, weights=sub_counts), 2) # take the weighted average
         # new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
+    keep_columns = ["model",] + ["model_type"] + list(subset_mapping.keys())
     # keep_columns = ["model", "average"] + subsets
     new_df = new_df[keep_columns]
     Then, take the average of the three buckets as "average"
     """
     new_df = dataframe.copy()
+    existing_subsets = new_df.columns[3:] # model, model_type, average
     final_subsets = ["Length Bias", "Neutral", "Terse Bias"]
     # new data is empty list dict for each final subset
     new_data = {s: [] for s in final_subsets}
 prefs_data = load_all_data(repo_dir_herm, subdir="pref-sets").sort_values(by='average', ascending=False)
 # prefs_data_sub = expand_subsets(prefs_data).sort_values(by='average', ascending=False)
+col_types_herm = ["markdown"] + ["str"] + ["number"] * (len(herm_data.columns) - 1)
+col_types_herm_avg = ["markdown"]+ ["str"] + ["number"] * (len(herm_data_avg.columns) - 1)
 cols_herm_data_length = ["markdown"] + ["number"] * (len(herm_data_length.columns) - 1)
 col_types_prefs = ["markdown"] + ["number"] * (len(prefs_data.columns) - 1)
 # col_types_prefs_sub = ["markdown"] + ["number"] * (len(prefs_data_sub.columns) - 1)

src/utils.py CHANGED Viewed

@@ -61,6 +61,9 @@ def load_all_data(data_repo, subdir:str, subsubsets=False):    # use HF api to p
     # select all columns except "model"
     cols = df.columns.tolist()
     cols.remove("model")
     # remove model_beaker from dataframe
     if "model_beaker" in cols:
         cols.remove("model_beaker")
@@ -80,6 +83,12 @@ def load_all_data(data_repo, subdir:str, subsubsets=False):    # use HF api to p
     cols.insert(1, cols.pop(cols.index('average')))
     df = df.loc[:, cols]
     # remove column xstest (outdated data)
     # if xstest is a column
     if "xstest" in df.columns:

     # select all columns except "model"
     cols = df.columns.tolist()
     cols.remove("model")
+    # if model_type is a column (pref tests may not have it)
+    if "model_type" in cols:
+        cols.remove("model_type")
     # remove model_beaker from dataframe
     if "model_beaker" in cols:
         cols.remove("model_beaker")
     cols.insert(1, cols.pop(cols.index('average')))
     df = df.loc[:, cols]
+    # move model_type column to first
+    if "model_type" in cols:
+        cols = list(df.columns)
+        cols.insert(1, cols.pop(cols.index('model_type')))
+        df = df.loc[:, cols]
     # remove column xstest (outdated data)
     # if xstest is a column
     if "xstest" in df.columns: