Spaces:
Build error
Build error
| from glob import glob | |
| from sklearn.metrics import accuracy_score, recall_score, f1_score | |
| import os | |
| import pandas as pd | |
| def get_merged_df(results_path, skip_samples = True): | |
| results = glob(os.path.join(results_path, "*.json")) | |
| dfs = [] | |
| for r in results: | |
| if skip_samples and 'sample_result' in r: | |
| continue | |
| df = pd.read_json(r, lines = True) | |
| if df.drop(['algorithm'], axis = 1).isna().values.any(): | |
| print(f"Missing values in {r}") | |
| else: | |
| dfs.append(df) | |
| full_df = pd.concat(dfs) | |
| return full_df | |
| def map_df(full_df): | |
| gnd_truth_mapping = {'full fake': 1, | |
| 'half fake': 1, | |
| 'mostly fake': 1, | |
| 'real': 0} | |
| pred_mapping = {'fake':1, 'real': 0} | |
| full_df['gnd_truth'] = full_df['label'].map(gnd_truth_mapping) | |
| full_df['pred'] = full_df['type'].map(pred_mapping) | |
| return full_df | |
| def get_duration_scores(df): | |
| columns = ['Under 26 s', '55 s', '125 s', 'Overall' ] | |
| samples_tested = [] | |
| acc_scores = [] | |
| for c in columns: | |
| if c == 'Overall': | |
| mask = df.gnd_truth == 0 | |
| elif c == 'Under 26 s': | |
| mask = (df.gnd_truth == 0) & (df.duration < 26) | |
| elif c == '55 s': | |
| mask = (df.gnd_truth == 0) & (df.duration >= 26) & (df.duration < 56) | |
| elif c == '125 s': | |
| mask = (df.gnd_truth == 0) & (df.duration >= 56) & (df.duration < 126) | |
| else: | |
| raise ValueError | |
| sel_df = df[mask] | |
| samples_tested.append(len(sel_df)) | |
| acc_scores.append(round(accuracy_score(sel_df.gnd_truth.values, sel_df.pred.values), 3)) | |
| lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Accuracy": acc_scores}) | |
| return lb | |
| def get_algorithm_scores_v1(df): | |
| columns = list(df[df.label != 'real'].algorithm.unique()) | |
| samples_tested = [] | |
| acc_scores = [] | |
| rec_scores = [] | |
| for c in columns: | |
| mask = (df.algorithm == c) | (df.label == 'real') | |
| sel_df = df[mask] | |
| samples_tested.append(len(sel_df[sel_df.label != 'real'])) | |
| rec_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values), 3)) | |
| lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Recall": rec_scores}) | |
| return lb | |
| def get_algorithm_scores_v2(df): | |
| columns = list(df[df.label != 'real'].algorithm.unique()) | |
| columns2 = list(df[df.label != 'real'].label.unique()) | |
| samples_tested = [] | |
| acc_scores = [] | |
| tpr_scores = [] | |
| tnr_scores = [float('nan')]*(len(columns) + len(columns2)) | |
| f1_scores = [float('nan')]*(len(columns) + len(columns2)) | |
| for c in columns: | |
| mask = (df.algorithm == c) | |
| sel_df = df[mask] | |
| samples_tested.append(len(sel_df)) | |
| tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1), 3)) | |
| for c in columns2: | |
| mask = (df.label == c) | |
| sel_df = df[mask] | |
| samples_tested.append(len(sel_df)) | |
| tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1), 3)) | |
| mask = (df.label != "real") | |
| sel_df = df[mask] | |
| tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1), 3)) | |
| mask = (df.label == "real") | |
| sel_df = df[mask] | |
| tnr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=0), 3)) | |
| sel_df = df.copy() | |
| samples_tested.append(len(sel_df)) | |
| f1_scores.append(round(f1_score(sel_df.gnd_truth.values, sel_df.pred.values, average="macro"), 3)) | |
| lb = pd.DataFrame({"Sample": columns + columns2 + ["overall"], "Num Samples": samples_tested, | |
| "TPR": tpr_scores, "TNR": tnr_scores, "F1": f1_scores}) | |
| return lb | |
| def build_leaderboard(results_path = 'results'): | |
| full_df = get_merged_df(results_path) | |
| full_df_mapped = map_df(full_df) | |
| leaderboard = get_algorithm_scores_v2(full_df_mapped) | |
| return leaderboard | |