EMMA_leaderboard

Runtime error

App Files Files Community

Lakoc commited on Jun 5

Commit

034cbdf

1 Parent(s): 7da0d34

missing server script

Browse files

Files changed (1) hide show

leaderboard_server.py +92 -0

leaderboard_server.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import json
+import os
+import meeteval.io
+import pandas as pd
+from utils import calc_wer, aggregate_wer_metrics
+from txt_norm import get_text_norm
+# Constants
+REFERENCE_BASE_PATH = "./references"  # e.g. ./references/single_channel/dataset1.json
+TASKS_METADATA_PATH = "./tasks_metadata.json"
+class LeaderboardServer:
+    def __init__(self):
+        self.local_leaderboard = os.path.abspath("submissions")
+        self.submisssion_id_to_file = {}  # Maps model_id to filepath
+        self.tasks_metadata = json.load(open("tasks_metadata.json"))["tasks"]
+        self.submission_ids = set()
+        self.results_file = os.path.join(self.local_leaderboard, "results.json")
+        os.makedirs(self.local_leaderboard, exist_ok=True)
+        self.fetch_existing_models()
+        self.text_normalizer = get_text_norm("whisper_nsf")
+    def fetch_existing_models(self):
+        self.submisssion_id_to_file.clear()
+        self.submission_ids.clear()
+        results_path = os.path.join(self.local_leaderboard, "results.json")
+        if not os.path.exists(results_path):
+            return
+        with open(results_path) as f:
+            results = json.load(f)
+        for model_id in results:
+            self.submission_ids.add(model_id)
+            hyp_path = os.path.join(self.local_leaderboard, f"{model_id}_hyp.json")
+            self.submisssion_id_to_file[model_id] = hyp_path
+    def prepare_model_for_submission(self, file, metadata, task, datasets):
+        submitted_by = metadata["submitted_by"]
+        model_id = metadata["model_id"]
+        # Run WER eval
+        results = {}
+        hyp_seglst = meeteval.io.load(file)
+        for dataset in datasets:
+            ref_path = os.path.join("references", task, f"{dataset}.json")
+            ref_seglst = meeteval.io.load(ref_path)
+            sessions = ref_seglst.unique('session_id')
+            local_hyps = hyp_seglst.filter(lambda seg: seg['session_id'] in sessions)
+            ref_seglst = ref_seglst.map(lambda seg: {**seg, "words":self.text_normalizer(seg["words"])})
+            local_hyps = local_hyps.map(lambda seg: {**seg, "words":self.text_normalizer(seg["words"])})
+            per_session_wers = calc_wer(tcp_hyp_seglst=local_hyps, ref_seglst=ref_seglst, collar=5, metrics_list=["tcp_wer"])
+            metrics = aggregate_wer_metrics(per_session_wers, ["tcp_wer"])
+            results[dataset] = metrics
+        # Update results file
+        results_path = os.path.join(self.local_leaderboard, "results.json")
+        if os.path.exists(results_path):
+            with open(results_path) as f:
+                all_results = json.load(f)
+        else:
+            all_results = {}
+        all_results[model_id] = {
+            "submitted_by": submitted_by,
+            "results": results
+        }
+        with open(results_path, "w") as f:
+            json.dump(all_results, f, indent=2)
+    def update_leaderboard(self):
+        self.fetch_existing_models()
+    def get_leaderboard(self):
+        results_path = os.path.join(self.local_leaderboard, "results.json")
+        if not os.path.exists(results_path):
+            return pd.DataFrame(columns=["No submissions yet"])
+        with open(results_path) as f:
+            results = json.load(f)
+        rows = []
+        for model_id, content in results.items():
+            row = {"Model ID": model_id, "Submitted by": content["submitted_by"]}
+            for k, v in content["results"].items():
+                row[k] = v.get("tcp_wer", None)
+            rows.append(row)
+        return pd.DataFrame(rows)