Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	
		burtenshaw
		
	commited on
		
		
					Commit 
							
							·
						
						1c7c01e
	
1
								Parent(s):
							
							b5eec3d
								
update app to use lighteval format
Browse files
    	
        app.py
    CHANGED
    
    | @@ -9,7 +9,7 @@ from datasets import load_dataset | |
| 9 | 
             
            abs_path = Path(__file__).parent
         | 
| 10 | 
             
            submissions = json.load(open(abs_path / "submissions.json"))
         | 
| 11 |  | 
| 12 | 
            -
            TASKS = [" | 
| 13 | 
             
            TYPES = [
         | 
| 14 | 
             
                "markdown",
         | 
| 15 | 
             
                "markdown",
         | 
| @@ -21,14 +21,45 @@ COLUMNS = ["User", "Model Name", "MMLU", "Average ⬆️", "Results"] | |
| 21 | 
             
            WIDTHS = ["25%", "25%", "15%", "15%", "10%"]
         | 
| 22 |  | 
| 23 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 24 | 
             
            def load_submissions():
         | 
| 25 | 
             
                leaderboard = []
         | 
| 26 | 
            -
             | 
| 27 | 
             
                for submission in submissions["submissions"]:
         | 
| 28 | 
             
                    ds = load_dataset(submission["results-dataset"], "results")
         | 
| 29 | 
            -
                    ds = ds.filter(lambda x: x["task"] in TASKS)
         | 
| 30 |  | 
| 31 | 
            -
                     | 
|  | |
|  | |
|  | |
| 32 |  | 
| 33 | 
             
                    leaderboard_row = {}
         | 
| 34 |  | 
| @@ -40,11 +71,12 @@ def load_submissions(): | |
| 40 | 
             
                        f"[{submission['model_name']}](https://huggingface.co/{submission['model_name']})"
         | 
| 41 | 
             
                    )
         | 
| 42 |  | 
| 43 | 
            -
                    for result in  | 
| 44 | 
            -
                        leaderboard_row[ | 
| 45 | 
            -
                        all_accuracy.append(result["accuracy"])
         | 
| 46 |  | 
| 47 | 
            -
                    leaderboard_row["Average ⬆️"] = sum( | 
|  | |
|  | |
| 48 |  | 
| 49 | 
             
                    leaderboard_row["results-dataset"] = (
         | 
| 50 | 
             
                        f"[🔗](https://huggingface.co/datasets/{submission['results-dataset']})"
         | 
|  | |
| 9 | 
             
            abs_path = Path(__file__).parent
         | 
| 10 | 
             
            submissions = json.load(open(abs_path / "submissions.json"))
         | 
| 11 |  | 
| 12 | 
            +
            TASKS = [("gsm8k", "lighteval|gsm8k|0", "extractive_match")]
         | 
| 13 | 
             
            TYPES = [
         | 
| 14 | 
             
                "markdown",
         | 
| 15 | 
             
                "markdown",
         | 
|  | |
| 21 | 
             
            WIDTHS = ["25%", "25%", "15%", "15%", "10%"]
         | 
| 22 |  | 
| 23 |  | 
| 24 | 
            +
            def load_results(dataset):
         | 
| 25 | 
            +
                results = []
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                try:
         | 
| 28 | 
            +
                    output = dataset["latest"]["results"]
         | 
| 29 | 
            +
                    output = output[-1]
         | 
| 30 | 
            +
                except KeyError as e:
         | 
| 31 | 
            +
                    raise ValueError("Cannot find 'latest' key in the dataset")
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                try:
         | 
| 34 | 
            +
                    output = json.loads(output)
         | 
| 35 | 
            +
                except ValueError as e:
         | 
| 36 | 
            +
                    raise ValueError("Cannot parse the output as JSON")
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                for name, task, metric in TASKS:
         | 
| 39 | 
            +
                    try:
         | 
| 40 | 
            +
                        output = output[task]
         | 
| 41 | 
            +
                    except KeyError as e:
         | 
| 42 | 
            +
                        raise ValueError(f"Cannot find '{task}' key in the dataset")
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                    try:
         | 
| 45 | 
            +
                        output = (name, output[metric])
         | 
| 46 | 
            +
                    except KeyError as e:
         | 
| 47 | 
            +
                        raise ValueError("Cannot find 'extractive_match' key in the dataset")
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                    results.append(output)
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                return results
         | 
| 52 | 
            +
             | 
| 53 | 
            +
             | 
| 54 | 
             
            def load_submissions():
         | 
| 55 | 
             
                leaderboard = []
         | 
|  | |
| 56 | 
             
                for submission in submissions["submissions"]:
         | 
| 57 | 
             
                    ds = load_dataset(submission["results-dataset"], "results")
         | 
|  | |
| 58 |  | 
| 59 | 
            +
                    try:
         | 
| 60 | 
            +
                        results = load_results(ds)
         | 
| 61 | 
            +
                    except ValueError as e:
         | 
| 62 | 
            +
                        raise ValueError(f"Cannot load results for {ds['results-dataset']}") from e
         | 
| 63 |  | 
| 64 | 
             
                    leaderboard_row = {}
         | 
| 65 |  | 
|  | |
| 71 | 
             
                        f"[{submission['model_name']}](https://huggingface.co/{submission['model_name']})"
         | 
| 72 | 
             
                    )
         | 
| 73 |  | 
| 74 | 
            +
                    for name, result in results:
         | 
| 75 | 
            +
                        leaderboard_row[name] = result
         | 
|  | |
| 76 |  | 
| 77 | 
            +
                    leaderboard_row["Average ⬆️"] = sum(result for _, result in results) / len(
         | 
| 78 | 
            +
                        results
         | 
| 79 | 
            +
                    )
         | 
| 80 |  | 
| 81 | 
             
                    leaderboard_row["results-dataset"] = (
         | 
| 82 | 
             
                        f"[🔗](https://huggingface.co/datasets/{submission['results-dataset']})"
         | 
    	
        docs.md
    CHANGED
    
    | @@ -65,7 +65,14 @@ Open a pull request on the [leaderboard space](https://huggingface.co/spaces/smo | |
| 65 | 
             
            ```json
         | 
| 66 | 
             
            {
         | 
| 67 | 
             
                "submissions": [
         | 
| 68 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 69 | 
             
                    ... # existing submissions
         | 
| 70 |  | 
| 71 | 
             
                    {
         | 
|  | |
| 65 | 
             
            ```json
         | 
| 66 | 
             
            {
         | 
| 67 | 
             
                "submissions": [
         | 
| 68 | 
            +
                    {
         | 
| 69 | 
            +
                        "username": "HuggingFaceTB",
         | 
| 70 | 
            +
                        "model_name": "SmolLM3-3B",
         | 
| 71 | 
            +
                        "chapter": "1",
         | 
| 72 | 
            +
                        "submission_date": "2025-09-02",
         | 
| 73 | 
            +
                        "results-dataset": "smol-course/details_HuggingFaceTB__SmolLM3-3B_private"
         | 
| 74 | 
            +
                    },
         | 
| 75 | 
            +
                    
         | 
| 76 | 
             
                    ... # existing submissions
         | 
| 77 |  | 
| 78 | 
             
                    {
         | 
