Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	
		Lakoc
		
	commited on
		
		
					Commit 
							
							·
						
						034cbdf
	
1
								Parent(s):
							
							7da0d34
								
missing server script
Browse files- leaderboard_server.py +92 -0
    	
        leaderboard_server.py
    ADDED
    
    | @@ -0,0 +1,92 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import json
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            import meeteval.io
         | 
| 5 | 
            +
            import pandas as pd
         | 
| 6 | 
            +
            from utils import calc_wer, aggregate_wer_metrics
         | 
| 7 | 
            +
            from txt_norm import get_text_norm
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # Constants
         | 
| 10 | 
            +
            REFERENCE_BASE_PATH = "./references"  # e.g. ./references/single_channel/dataset1.json
         | 
| 11 | 
            +
            TASKS_METADATA_PATH = "./tasks_metadata.json"
         | 
| 12 | 
            +
             | 
| 13 | 
            +
             | 
| 14 | 
            +
            class LeaderboardServer:
         | 
| 15 | 
            +
                def __init__(self):
         | 
| 16 | 
            +
                    self.local_leaderboard = os.path.abspath("submissions")
         | 
| 17 | 
            +
                    self.submisssion_id_to_file = {}  # Maps model_id to filepath
         | 
| 18 | 
            +
                    self.tasks_metadata = json.load(open("tasks_metadata.json"))["tasks"]
         | 
| 19 | 
            +
                    self.submission_ids = set()
         | 
| 20 | 
            +
                    self.results_file = os.path.join(self.local_leaderboard, "results.json")
         | 
| 21 | 
            +
                    os.makedirs(self.local_leaderboard, exist_ok=True)
         | 
| 22 | 
            +
                    self.fetch_existing_models()
         | 
| 23 | 
            +
                    self.text_normalizer = get_text_norm("whisper_nsf")
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                def fetch_existing_models(self):
         | 
| 26 | 
            +
                    self.submisssion_id_to_file.clear()
         | 
| 27 | 
            +
                    self.submission_ids.clear()
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    results_path = os.path.join(self.local_leaderboard, "results.json")
         | 
| 30 | 
            +
                    if not os.path.exists(results_path):
         | 
| 31 | 
            +
                        return
         | 
| 32 | 
            +
                    with open(results_path) as f:
         | 
| 33 | 
            +
                        results = json.load(f)
         | 
| 34 | 
            +
                    for model_id in results:
         | 
| 35 | 
            +
                        self.submission_ids.add(model_id)
         | 
| 36 | 
            +
                        hyp_path = os.path.join(self.local_leaderboard, f"{model_id}_hyp.json")
         | 
| 37 | 
            +
                        self.submisssion_id_to_file[model_id] = hyp_path
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                def prepare_model_for_submission(self, file, metadata, task, datasets):
         | 
| 40 | 
            +
                    submitted_by = metadata["submitted_by"]
         | 
| 41 | 
            +
                    model_id = metadata["model_id"]
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                    # Run WER eval
         | 
| 44 | 
            +
                    results = {}
         | 
| 45 | 
            +
                    hyp_seglst = meeteval.io.load(file)
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                    for dataset in datasets:
         | 
| 48 | 
            +
                        ref_path = os.path.join("references", task, f"{dataset}.json")
         | 
| 49 | 
            +
                        ref_seglst = meeteval.io.load(ref_path)
         | 
| 50 | 
            +
                        sessions = ref_seglst.unique('session_id')
         | 
| 51 | 
            +
                        local_hyps = hyp_seglst.filter(lambda seg: seg['session_id'] in sessions)
         | 
| 52 | 
            +
                        ref_seglst = ref_seglst.map(lambda seg: {**seg, "words":self.text_normalizer(seg["words"])})
         | 
| 53 | 
            +
                        local_hyps = local_hyps.map(lambda seg: {**seg, "words":self.text_normalizer(seg["words"])})
         | 
| 54 | 
            +
                        per_session_wers = calc_wer(tcp_hyp_seglst=local_hyps, ref_seglst=ref_seglst, collar=5, metrics_list=["tcp_wer"])
         | 
| 55 | 
            +
                        metrics = aggregate_wer_metrics(per_session_wers, ["tcp_wer"])
         | 
| 56 | 
            +
                        results[dataset] = metrics
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                    # Update results file
         | 
| 59 | 
            +
                    results_path = os.path.join(self.local_leaderboard, "results.json")
         | 
| 60 | 
            +
                    if os.path.exists(results_path):
         | 
| 61 | 
            +
                        with open(results_path) as f:
         | 
| 62 | 
            +
                            all_results = json.load(f)
         | 
| 63 | 
            +
                    else:
         | 
| 64 | 
            +
                        all_results = {}
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                    all_results[model_id] = {
         | 
| 67 | 
            +
                        "submitted_by": submitted_by,
         | 
| 68 | 
            +
                        "results": results
         | 
| 69 | 
            +
                    }
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                    with open(results_path, "w") as f:
         | 
| 72 | 
            +
                        json.dump(all_results, f, indent=2)
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                def update_leaderboard(self):
         | 
| 75 | 
            +
                    self.fetch_existing_models()
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                def get_leaderboard(self):
         | 
| 78 | 
            +
                    results_path = os.path.join(self.local_leaderboard, "results.json")
         | 
| 79 | 
            +
                    if not os.path.exists(results_path):
         | 
| 80 | 
            +
                        return pd.DataFrame(columns=["No submissions yet"])
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                    with open(results_path) as f:
         | 
| 83 | 
            +
                        results = json.load(f)
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                    rows = []
         | 
| 86 | 
            +
                    for model_id, content in results.items():
         | 
| 87 | 
            +
                        row = {"Model ID": model_id, "Submitted by": content["submitted_by"]}
         | 
| 88 | 
            +
                        for k, v in content["results"].items():
         | 
| 89 | 
            +
                            row[k] = v.get("tcp_wer", None)
         | 
| 90 | 
            +
                        rows.append(row)
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                    return pd.DataFrame(rows)
         | 
