saattrupdan commited on
Commit
a27fbcc
·
1 Parent(s): 376f461

fix: Fetch data from tar.gz file

Browse files
Files changed (1) hide show
  1. app.py +22 -6
app.py CHANGED
@@ -1,9 +1,11 @@
1
  """Script to produce radial plots."""
2
 
3
  from functools import partial
 
4
  import plotly.graph_objects as go
5
  import json
6
  import numpy as np
 
7
  from collections import defaultdict
8
  import pandas as pd
9
  from pydantic import BaseModel
@@ -782,19 +784,33 @@ def fetch_results() -> dict[Language, pd.DataFrame]:
782
  Returns:
783
  A dictionary of languages -> results-dataframes, whose indices are the
784
  models and columns are the tasks.
 
 
 
 
 
 
785
  """
786
  logger.info("Fetching results from EuroEval benchmark...")
787
 
 
788
  response = requests.get(
789
  "https://raw.githubusercontent.com/EuroEval/leaderboards/refs/heads/main"
790
- "/results/results.jsonl"
791
  )
792
  response.raise_for_status()
793
- records = [
794
- json.loads(dct_str)
795
- for dct_str in response.text.split("\n")
796
- if dct_str.strip("\n")
797
- ]
 
 
 
 
 
 
 
798
 
799
  # Build a dictionary of languages -> results-dataframes, whose indices are the
800
  # models and columns are the tasks.
 
1
  """Script to produce radial plots."""
2
 
3
  from functools import partial
4
+ import io
5
  import plotly.graph_objects as go
6
  import json
7
  import numpy as np
8
+ import tarfile
9
  from collections import defaultdict
10
  import pandas as pd
11
  from pydantic import BaseModel
 
784
  Returns:
785
  A dictionary of languages -> results-dataframes, whose indices are the
786
  models and columns are the tasks.
787
+
788
+ Raises:
789
+ FileNotFoundError:
790
+ If the results.jsonl file is not found in the tar.gz file.
791
+ HTTPError:
792
+ If there is an error fetching the results from GitHub.
793
  """
794
  logger.info("Fetching results from EuroEval benchmark...")
795
 
796
+ # Get the tar.gz file containing the results
797
  response = requests.get(
798
  "https://raw.githubusercontent.com/EuroEval/leaderboards/refs/heads/main"
799
+ "/results.tar.gz"
800
  )
801
  response.raise_for_status()
802
+ compressed_file = io.BytesIO(response.content)
803
+
804
+ # Unpack the tar.gz file in memory and read the JSONL file
805
+ with tarfile.open(fileobj=compressed_file, mode="r:gz") as tar:
806
+ results_file = tar.extractfile(member="results/results.jsonl")
807
+ if results_file is None:
808
+ raise FileNotFoundError(
809
+ "results/results.jsonl not found in the tar.gz file."
810
+ )
811
+ result_lines = results_file.read().decode(encoding="utf-8").splitlines()
812
+
813
+ records = [json.loads(dct_str) for dct_str in result_lines if dct_str.strip("\n")]
814
 
815
  # Build a dictionary of languages -> results-dataframes, whose indices are the
816
  # models and columns are the tasks.