Spaces:
Runtime error
Runtime error
Lakoc
commited on
Commit
·
034cbdf
1
Parent(s):
7da0d34
missing server script
Browse files- leaderboard_server.py +92 -0
leaderboard_server.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
import meeteval.io
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from utils import calc_wer, aggregate_wer_metrics
|
| 7 |
+
from txt_norm import get_text_norm
|
| 8 |
+
|
| 9 |
+
# Constants
|
| 10 |
+
REFERENCE_BASE_PATH = "./references" # e.g. ./references/single_channel/dataset1.json
|
| 11 |
+
TASKS_METADATA_PATH = "./tasks_metadata.json"
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class LeaderboardServer:
|
| 15 |
+
def __init__(self):
|
| 16 |
+
self.local_leaderboard = os.path.abspath("submissions")
|
| 17 |
+
self.submisssion_id_to_file = {} # Maps model_id to filepath
|
| 18 |
+
self.tasks_metadata = json.load(open("tasks_metadata.json"))["tasks"]
|
| 19 |
+
self.submission_ids = set()
|
| 20 |
+
self.results_file = os.path.join(self.local_leaderboard, "results.json")
|
| 21 |
+
os.makedirs(self.local_leaderboard, exist_ok=True)
|
| 22 |
+
self.fetch_existing_models()
|
| 23 |
+
self.text_normalizer = get_text_norm("whisper_nsf")
|
| 24 |
+
|
| 25 |
+
def fetch_existing_models(self):
|
| 26 |
+
self.submisssion_id_to_file.clear()
|
| 27 |
+
self.submission_ids.clear()
|
| 28 |
+
|
| 29 |
+
results_path = os.path.join(self.local_leaderboard, "results.json")
|
| 30 |
+
if not os.path.exists(results_path):
|
| 31 |
+
return
|
| 32 |
+
with open(results_path) as f:
|
| 33 |
+
results = json.load(f)
|
| 34 |
+
for model_id in results:
|
| 35 |
+
self.submission_ids.add(model_id)
|
| 36 |
+
hyp_path = os.path.join(self.local_leaderboard, f"{model_id}_hyp.json")
|
| 37 |
+
self.submisssion_id_to_file[model_id] = hyp_path
|
| 38 |
+
|
| 39 |
+
def prepare_model_for_submission(self, file, metadata, task, datasets):
|
| 40 |
+
submitted_by = metadata["submitted_by"]
|
| 41 |
+
model_id = metadata["model_id"]
|
| 42 |
+
|
| 43 |
+
# Run WER eval
|
| 44 |
+
results = {}
|
| 45 |
+
hyp_seglst = meeteval.io.load(file)
|
| 46 |
+
|
| 47 |
+
for dataset in datasets:
|
| 48 |
+
ref_path = os.path.join("references", task, f"{dataset}.json")
|
| 49 |
+
ref_seglst = meeteval.io.load(ref_path)
|
| 50 |
+
sessions = ref_seglst.unique('session_id')
|
| 51 |
+
local_hyps = hyp_seglst.filter(lambda seg: seg['session_id'] in sessions)
|
| 52 |
+
ref_seglst = ref_seglst.map(lambda seg: {**seg, "words":self.text_normalizer(seg["words"])})
|
| 53 |
+
local_hyps = local_hyps.map(lambda seg: {**seg, "words":self.text_normalizer(seg["words"])})
|
| 54 |
+
per_session_wers = calc_wer(tcp_hyp_seglst=local_hyps, ref_seglst=ref_seglst, collar=5, metrics_list=["tcp_wer"])
|
| 55 |
+
metrics = aggregate_wer_metrics(per_session_wers, ["tcp_wer"])
|
| 56 |
+
results[dataset] = metrics
|
| 57 |
+
|
| 58 |
+
# Update results file
|
| 59 |
+
results_path = os.path.join(self.local_leaderboard, "results.json")
|
| 60 |
+
if os.path.exists(results_path):
|
| 61 |
+
with open(results_path) as f:
|
| 62 |
+
all_results = json.load(f)
|
| 63 |
+
else:
|
| 64 |
+
all_results = {}
|
| 65 |
+
|
| 66 |
+
all_results[model_id] = {
|
| 67 |
+
"submitted_by": submitted_by,
|
| 68 |
+
"results": results
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
with open(results_path, "w") as f:
|
| 72 |
+
json.dump(all_results, f, indent=2)
|
| 73 |
+
|
| 74 |
+
def update_leaderboard(self):
|
| 75 |
+
self.fetch_existing_models()
|
| 76 |
+
|
| 77 |
+
def get_leaderboard(self):
|
| 78 |
+
results_path = os.path.join(self.local_leaderboard, "results.json")
|
| 79 |
+
if not os.path.exists(results_path):
|
| 80 |
+
return pd.DataFrame(columns=["No submissions yet"])
|
| 81 |
+
|
| 82 |
+
with open(results_path) as f:
|
| 83 |
+
results = json.load(f)
|
| 84 |
+
|
| 85 |
+
rows = []
|
| 86 |
+
for model_id, content in results.items():
|
| 87 |
+
row = {"Model ID": model_id, "Submitted by": content["submitted_by"]}
|
| 88 |
+
for k, v in content["results"].items():
|
| 89 |
+
row[k] = v.get("tcp_wer", None)
|
| 90 |
+
rows.append(row)
|
| 91 |
+
|
| 92 |
+
return pd.DataFrame(rows)
|