Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	
		Tom Aarsen
		
	commited on
		
		
					Commit 
							
							·
						
						046fe24
	
1
								Parent(s):
							
							19c9748
								
Load "full" data to alsao get filenames
Browse files- refresh.py +4 -5
 - utils/model_size.py +1 -1
 
    	
        refresh.py
    CHANGED
    
    | 
         @@ -174,7 +174,8 @@ def filter_metric_fetched(name: str, metric: str, expected_metrics) -> bool: 
     | 
|
| 174 | 
         | 
| 175 | 
         | 
| 176 | 
         
             
            def get_dim_seq_size(model):
         
     | 
| 177 | 
         
            -
                 
     | 
| 
         | 
|
| 178 | 
         
             
                dim, seq = "", ""
         
     | 
| 179 | 
         
             
                for filename in filenames:
         
     | 
| 180 | 
         
             
                    if re.match("\d+_Pooling/config.json", filename):
         
     | 
| 
         @@ -302,7 +303,7 @@ def get_mteb_data( 
     | 
|
| 302 | 
         
             
                    external_model_results = json.load(f)
         
     | 
| 303 | 
         | 
| 304 | 
         
             
                api = API
         
     | 
| 305 | 
         
            -
                models = list(api.list_models(filter="mteb"))
         
     | 
| 306 | 
         
             
                # Legacy names changes; Also fetch the old results & merge later
         
     | 
| 307 | 
         
             
                if "MLSUMClusteringP2P (fr)" in datasets:
         
     | 
| 308 | 
         
             
                    datasets.append("MLSUMClusteringP2P")
         
     | 
| 
         @@ -429,9 +430,7 @@ def get_mteb_data( 
     | 
|
| 429 | 
         
             
                        if add_emb_dim:
         
     | 
| 430 | 
         
             
                            # The except clause triggers on gated repos, we can use external metadata for those
         
     | 
| 431 | 
         
             
                            try:
         
     | 
| 432 | 
         
            -
                                MODEL_INFOS[model.modelId]["dim_seq_size"] = list(
         
     | 
| 433 | 
         
            -
                                    get_dim_seq_size(model)
         
     | 
| 434 | 
         
            -
                                )
         
     | 
| 435 | 
         
             
                            except:
         
     | 
| 436 | 
         
             
                                name_without_org = model.modelId.split("/")[-1]
         
     | 
| 437 | 
         
             
                                # EXTERNAL_MODEL_TO_SIZE[name_without_org] refers to millions of parameters, so for memory usage
         
     | 
| 
         | 
|
| 174 | 
         | 
| 175 | 
         | 
| 176 | 
         
             
            def get_dim_seq_size(model):
         
     | 
| 177 | 
         
            +
                siblings = model.siblings or []
         
     | 
| 178 | 
         
            +
                filenames = [sib.rfilename for sib in siblings]
         
     | 
| 179 | 
         
             
                dim, seq = "", ""
         
     | 
| 180 | 
         
             
                for filename in filenames:
         
     | 
| 181 | 
         
             
                    if re.match("\d+_Pooling/config.json", filename):
         
     | 
| 
         | 
|
| 303 | 
         
             
                    external_model_results = json.load(f)
         
     | 
| 304 | 
         | 
| 305 | 
         
             
                api = API
         
     | 
| 306 | 
         
            +
                models = list(api.list_models(filter="mteb", full=True))
         
     | 
| 307 | 
         
             
                # Legacy names changes; Also fetch the old results & merge later
         
     | 
| 308 | 
         
             
                if "MLSUMClusteringP2P (fr)" in datasets:
         
     | 
| 309 | 
         
             
                    datasets.append("MLSUMClusteringP2P")
         
     | 
| 
         | 
|
| 430 | 
         
             
                        if add_emb_dim:
         
     | 
| 431 | 
         
             
                            # The except clause triggers on gated repos, we can use external metadata for those
         
     | 
| 432 | 
         
             
                            try:
         
     | 
| 433 | 
         
            +
                                MODEL_INFOS[model.modelId]["dim_seq_size"] = list(get_dim_seq_size(model))
         
     | 
| 
         | 
|
| 
         | 
|
| 434 | 
         
             
                            except:
         
     | 
| 435 | 
         
             
                                name_without_org = model.modelId.split("/")[-1]
         
     | 
| 436 | 
         
             
                                # EXTERNAL_MODEL_TO_SIZE[name_without_org] refers to millions of parameters, so for memory usage
         
     | 
    	
        utils/model_size.py
    CHANGED
    
    | 
         @@ -15,7 +15,7 @@ def get_model_parameters_memory(model_info: ModelInfo): 
     | 
|
| 15 | 
         
             
                try:
         
     | 
| 16 | 
         
             
                    safetensors = get_safetensors_metadata(model_info.id)
         
     | 
| 17 | 
         
             
                except Exception as e:
         
     | 
| 18 | 
         
            -
                     
     | 
| 19 | 
         
             
                else:
         
     | 
| 20 | 
         
             
                    num_parameters = sum(safetensors.parameter_count.values())
         
     | 
| 21 | 
         
             
                    return round(num_parameters / 1e6), round(num_parameters * 4 / 1024**3, 2)
         
     | 
| 
         | 
|
| 15 | 
         
             
                try:
         
     | 
| 16 | 
         
             
                    safetensors = get_safetensors_metadata(model_info.id)
         
     | 
| 17 | 
         
             
                except Exception as e:
         
     | 
| 18 | 
         
            +
                    pass
         
     | 
| 19 | 
         
             
                else:
         
     | 
| 20 | 
         
             
                    num_parameters = sum(safetensors.parameter_count.values())
         
     | 
| 21 | 
         
             
                    return round(num_parameters / 1e6), round(num_parameters * 4 / 1024**3, 2)
         
     |