Spaces:
				
			
			
	
			
			
					
		Running
		
			on 
			
			CPU Upgrade
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
			on 
			
			CPU Upgrade
	
		Clémentine
		
	commited on
		
		
					Commit 
							
							·
						
						943f952
	
1
								Parent(s):
							
							314f91a
								
update read
Browse files- README.md +24 -3
- src/display/about.py +5 -3
- src/leaderboard/read_evals.py +5 -9
    	
        README.md
    CHANGED
    
    | @@ -1,6 +1,6 @@ | |
| 1 | 
             
            ---
         | 
| 2 | 
            -
            title:  | 
| 3 | 
            -
            emoji:  | 
| 4 | 
             
            colorFrom: green
         | 
| 5 | 
             
            colorTo: indigo
         | 
| 6 | 
             
            sdk: gradio
         | 
| @@ -12,4 +12,25 @@ license: apache-2.0 | |
| 12 |  | 
| 13 | 
             
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         | 
| 14 |  | 
| 15 | 
            -
            Most of the variables to change for a default leaderboard are in env (replace the path for your leaderboard) and src/display/about.
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
             
            ---
         | 
| 2 | 
            +
            title: Demo Leaderboard
         | 
| 3 | 
            +
            emoji: 🥇
         | 
| 4 | 
             
            colorFrom: green
         | 
| 5 | 
             
            colorTo: indigo
         | 
| 6 | 
             
            sdk: gradio
         | 
|  | |
| 12 |  | 
| 13 | 
             
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         | 
| 14 |  | 
| 15 | 
            +
            Most of the variables to change for a default leaderboard are in env (replace the path for your leaderboard) and src/display/about.
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            Results files should have the following format:
         | 
| 18 | 
            +
            ```
         | 
| 19 | 
            +
            {
         | 
| 20 | 
            +
                "config": {
         | 
| 21 | 
            +
                    "model_dtype": "torch.float16", # or torch.bfloat16 or 8bit or 4bit
         | 
| 22 | 
            +
                    "model_name": "path of the model on the hub: org/model",
         | 
| 23 | 
            +
                    "model_sha": "revision on the hub",
         | 
| 24 | 
            +
                },
         | 
| 25 | 
            +
                "results": {
         | 
| 26 | 
            +
                    "task_name": {
         | 
| 27 | 
            +
                        "metric_name": score,
         | 
| 28 | 
            +
                    },
         | 
| 29 | 
            +
                    "task_name2": {
         | 
| 30 | 
            +
                        "metric_name": score,
         | 
| 31 | 
            +
                    }
         | 
| 32 | 
            +
                }
         | 
| 33 | 
            +
            }
         | 
| 34 | 
            +
            ```
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            Request files are created automatically by this tool.
         | 
    	
        src/display/about.py
    CHANGED
    
    | @@ -10,15 +10,17 @@ class Task: | |
| 10 |  | 
| 11 | 
             
            # Init: to update with your specific keys
         | 
| 12 | 
             
            class Tasks(Enum):
         | 
| 13 | 
            -
                 | 
| 14 | 
            -
                 | 
|  | |
| 15 |  | 
| 16 |  | 
| 17 | 
             
            # Your leaderboard name
         | 
| 18 | 
            -
            TITLE = """<h1 align="center" id="space-title"> | 
| 19 |  | 
| 20 | 
             
            # What does your leaderboard evaluate?
         | 
| 21 | 
             
            INTRODUCTION_TEXT = """
         | 
|  | |
| 22 | 
             
            """
         | 
| 23 |  | 
| 24 | 
             
            # Which evaluations are you running? how can people reproduce what you have?
         | 
|  | |
| 10 |  | 
| 11 | 
             
            # Init: to update with your specific keys
         | 
| 12 | 
             
            class Tasks(Enum):
         | 
| 13 | 
            +
                # task_key in the json file, metric_key in the json file, name to display in the leaderboard 
         | 
| 14 | 
            +
                task0 = Task("task_name1", "metric_name", "First task")
         | 
| 15 | 
            +
                task1 = Task("task_name2", "metric_name", "Second task")
         | 
| 16 |  | 
| 17 |  | 
| 18 | 
             
            # Your leaderboard name
         | 
| 19 | 
            +
            TITLE = """<h1 align="center" id="space-title">Demo leaderboard</h1>"""
         | 
| 20 |  | 
| 21 | 
             
            # What does your leaderboard evaluate?
         | 
| 22 | 
             
            INTRODUCTION_TEXT = """
         | 
| 23 | 
            +
            Intro text
         | 
| 24 | 
             
            """
         | 
| 25 |  | 
| 26 | 
             
            # Which evaluations are you running? how can people reproduce what you have?
         | 
    	
        src/leaderboard/read_evals.py
    CHANGED
    
    | @@ -5,8 +5,6 @@ import os | |
| 5 | 
             
            from dataclasses import dataclass
         | 
| 6 |  | 
| 7 | 
             
            import dateutil
         | 
| 8 | 
            -
            from datetime import datetime
         | 
| 9 | 
            -
            from transformers import AutoConfig
         | 
| 10 | 
             
            import numpy as np
         | 
| 11 |  | 
| 12 | 
             
            from src.display.formatting import make_clickable_model
         | 
| @@ -16,7 +14,6 @@ from src.submission.check_validity import is_model_on_hub | |
| 16 |  | 
| 17 | 
             
            @dataclass
         | 
| 18 | 
             
            class EvalResult:
         | 
| 19 | 
            -
                # Also see src.display.utils.AutoEvalColumn for what will be displayed.
         | 
| 20 | 
             
                eval_name: str # org_model_precision (uid)
         | 
| 21 | 
             
                full_model: str # org/model (path on hub)
         | 
| 22 | 
             
                org: str 
         | 
| @@ -26,7 +23,7 @@ class EvalResult: | |
| 26 | 
             
                precision: Precision = Precision.Unknown
         | 
| 27 | 
             
                model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
         | 
| 28 | 
             
                weight_type: WeightType = WeightType.Original # Original or Adapter
         | 
| 29 | 
            -
                architecture: str = "Unknown"  | 
| 30 | 
             
                license: str = "?"
         | 
| 31 | 
             
                likes: int = 0
         | 
| 32 | 
             
                num_params: int = 0
         | 
| @@ -39,8 +36,7 @@ class EvalResult: | |
| 39 | 
             
                    with open(json_filepath) as fp:
         | 
| 40 | 
             
                        data = json.load(fp)
         | 
| 41 |  | 
| 42 | 
            -
                     | 
| 43 | 
            -
                    config = data.get("config", data.get("config_general", None))
         | 
| 44 |  | 
| 45 | 
             
                    # Precision
         | 
| 46 | 
             
                    precision = Precision.from_str(config.get("model_dtype"))
         | 
| @@ -59,7 +55,7 @@ class EvalResult: | |
| 59 | 
             
                        result_key = f"{org}_{model}_{precision.value.name}"
         | 
| 60 | 
             
                    full_model = "/".join(org_and_model)
         | 
| 61 |  | 
| 62 | 
            -
                    still_on_hub,  | 
| 63 | 
             
                        full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
         | 
| 64 | 
             
                    )
         | 
| 65 | 
             
                    architecture = "?"
         | 
| @@ -73,8 +69,8 @@ class EvalResult: | |
| 73 | 
             
                    for task in Tasks:
         | 
| 74 | 
             
                        task = task.value
         | 
| 75 |  | 
| 76 | 
            -
                        # We average all scores of a given metric
         | 
| 77 | 
            -
                        accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark  | 
| 78 | 
             
                        if accs.size == 0 or any([acc is None for acc in accs]):
         | 
| 79 | 
             
                            continue
         | 
| 80 |  | 
|  | |
| 5 | 
             
            from dataclasses import dataclass
         | 
| 6 |  | 
| 7 | 
             
            import dateutil
         | 
|  | |
|  | |
| 8 | 
             
            import numpy as np
         | 
| 9 |  | 
| 10 | 
             
            from src.display.formatting import make_clickable_model
         | 
|  | |
| 14 |  | 
| 15 | 
             
            @dataclass
         | 
| 16 | 
             
            class EvalResult:
         | 
|  | |
| 17 | 
             
                eval_name: str # org_model_precision (uid)
         | 
| 18 | 
             
                full_model: str # org/model (path on hub)
         | 
| 19 | 
             
                org: str 
         | 
|  | |
| 23 | 
             
                precision: Precision = Precision.Unknown
         | 
| 24 | 
             
                model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
         | 
| 25 | 
             
                weight_type: WeightType = WeightType.Original # Original or Adapter
         | 
| 26 | 
            +
                architecture: str = "Unknown" 
         | 
| 27 | 
             
                license: str = "?"
         | 
| 28 | 
             
                likes: int = 0
         | 
| 29 | 
             
                num_params: int = 0
         | 
|  | |
| 36 | 
             
                    with open(json_filepath) as fp:
         | 
| 37 | 
             
                        data = json.load(fp)
         | 
| 38 |  | 
| 39 | 
            +
                    config = data.get("config")
         | 
|  | |
| 40 |  | 
| 41 | 
             
                    # Precision
         | 
| 42 | 
             
                    precision = Precision.from_str(config.get("model_dtype"))
         | 
|  | |
| 55 | 
             
                        result_key = f"{org}_{model}_{precision.value.name}"
         | 
| 56 | 
             
                    full_model = "/".join(org_and_model)
         | 
| 57 |  | 
| 58 | 
            +
                    still_on_hub, _, model_config = is_model_on_hub(
         | 
| 59 | 
             
                        full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
         | 
| 60 | 
             
                    )
         | 
| 61 | 
             
                    architecture = "?"
         | 
|  | |
| 69 | 
             
                    for task in Tasks:
         | 
| 70 | 
             
                        task = task.value
         | 
| 71 |  | 
| 72 | 
            +
                        # We average all scores of a given metric (not all metrics are present in all files)
         | 
| 73 | 
            +
                        accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
         | 
| 74 | 
             
                        if accs.size == 0 or any([acc is None for acc in accs]):
         | 
| 75 | 
             
                            continue
         | 
| 76 |  | 
 
			
