saattrupdan commited on
Commit
376f461
·
1 Parent(s): 60d6a88

fix: Separate zero-shot performance from few-shot

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -786,7 +786,8 @@ def fetch_results() -> dict[Language, pd.DataFrame]:
786
  logger.info("Fetching results from EuroEval benchmark...")
787
 
788
  response = requests.get(
789
- "https://raw.githubusercontent.com/EuroEval/leaderboards/refs/heads/main/results/results.jsonl"
 
790
  )
791
  response.raise_for_status()
792
  records = [
@@ -805,6 +806,8 @@ def fetch_results() -> dict[Language, pd.DataFrame]:
805
  data_dict = defaultdict(dict)
806
  for record in records:
807
  model_name = record["model"]
 
 
808
  raw_results = record["results"]["raw"]
809
  if isinstance(raw_results, dict) and "test" in raw_results:
810
  raw_results = raw_results.get("test", raw_results)
 
786
  logger.info("Fetching results from EuroEval benchmark...")
787
 
788
  response = requests.get(
789
+ "https://raw.githubusercontent.com/EuroEval/leaderboards/refs/heads/main"
790
+ "/results/results.jsonl"
791
  )
792
  response.raise_for_status()
793
  records = [
 
806
  data_dict = defaultdict(dict)
807
  for record in records:
808
  model_name = record["model"]
809
+ if not record["few_shot"]:
810
+ model_name += " (zero-shot)"
811
  raw_results = record["results"]["raw"]
812
  if isinstance(raw_results, dict) and "test" in raw_results:
813
  raw_results = raw_results.get("test", raw_results)