Alexis Gobé commited on
Commit
1cf23a9
·
unverified ·
2 Parent(s): f2b0392 0a98ec9

Merge pull request #18 from leaderboard-modeles-IA-francais/remove-precision

Browse files
frontend/src/App.jsx CHANGED
@@ -6,6 +6,7 @@ import {
6
  useSearchParams,
7
  useLocation,
8
  } from "react-router-dom";
 
9
  import { Box, CssBaseline } from "@mui/material";
10
  import Navigation from "./components/Navigation/Navigation";
11
  import LeaderboardPage from "./pages/LeaderboardPage/LeaderboardPage";
@@ -69,6 +70,11 @@ function UrlHandler() {
69
 
70
  function App() {
71
 
 
 
 
 
 
72
  return (
73
  <div
74
  className="App"
@@ -113,7 +119,7 @@ function App() {
113
  <Route path="/vote" element={<VoteModelPage />} /> */}
114
  </Routes>
115
  </Box>
116
- <Footer />
117
  </Box>
118
  </LeaderboardProvider>
119
  </Router>
 
6
  useSearchParams,
7
  useLocation,
8
  } from "react-router-dom";
9
+ import { resolveLocalizedString, useResolveLocalizedString } from "i18n";
10
  import { Box, CssBaseline } from "@mui/material";
11
  import Navigation from "./components/Navigation/Navigation";
12
  import LeaderboardPage from "./pages/LeaderboardPage/LeaderboardPage";
 
70
 
71
  function App() {
72
 
73
+ const disclaimer = {
74
+ "fr": "Ce leaderboard compare les modèles de langage adaptés à la langue française, sur des jeux de données en français, adaptés aux spécificités culturelles de la francophonie. C'est d'abord un projet de recherche collaboratif, et nous espérons recevoir de nombreuses contributions pour l'améliorer au fil du temps ! Le leaderboard n'est que dans sa toute première version, et sera amené à évoluer régulièrement, avec de nouveaux jeux de données, de nouvelles métriques, et, nous l'espérons, beaucoup de nouveaux modèles ouverts soumis par la communauté ! Dans sa version initiale, nous avons couvert un panel de modèles ouverts, entraînés sur du français, de différentes tailles et origines. Note : les données d'évaluation ont été pour l'instant gardées confidentielles, pour préserver l'intégrité et la validité des résultats, et éviter les manipulations du classement.",
75
+ "en": "This leaderboard compares language models adapted to the French language, on French datasets, adapted to the cultural specificities of the French-speaking world. It is primarily a collaborative research project, and we hope to receive many contributions to improve it over time! The leaderboard is only in its very first version, and will evolve regularly, with new datasets, new metrics, and, we hope, many new open models submitted by the community! In its initial version, we covered a panel of open models, trained on French, of various sizes and origins. Note: The evaluation data has been kept confidential for the time being, to preserve the integrity and validity of the results, and avoid manipulation of the ranking."
76
+ }
77
+
78
  return (
79
  <div
80
  className="App"
 
119
  <Route path="/vote" element={<VoteModelPage />} /> */}
120
  </Routes>
121
  </Box>
122
+ <Footer disclaimer={disclaimer}/>
123
  </Box>
124
  </LeaderboardProvider>
125
  </Router>
frontend/src/components/Footer/Footer.tsx CHANGED
@@ -3,9 +3,11 @@ import {CallOut} from "@codegouvfr/react-dsfr/CallOut";
3
  import {Highlight} from "@codegouvfr/react-dsfr/Highlight";
4
  import { Alert } from "@codegouvfr/react-dsfr/Alert";
5
  // import { Box, Typography, Link } from "@mui/material";
 
6
 
7
- const footer = () => {
8
 
 
9
  // const callout = <Alert
10
  // severity="warning"
11
  // description="
@@ -20,9 +22,7 @@ const footer = () => {
20
  classes={{
21
  root: 'fr-highlight--orange-terre-battue'
22
  }}>
23
- Ce leaderboard compare les modèles de langage adaptés à la langue française, sur des jeux de données en français, adaptés aux spécificités culturelles de la francophonie. C'est d'abord un projet de recherche collaboratif, et nous espérons recevoir de nombreuses contributions pour l'améliorer au fil du temps !
24
- Le leaderboard n'est que dans sa toute première version, et sera amené à évoluer régulièrement, avec de nouveaux jeux de données, de nouvelles métriques, et, nous l'espérons, beaucoup de nouveaux modèles ouverts soumis par la communauté ! Dans sa version initiale, nous avons couvert un panel de modèles ouverts, entraînés sur du français, de différentes tailles et origines.
25
- Note : les données d'évaluation ont été pour l'instant gardées confidentielles, pour préserver l'intégrité et la validité des résultats, et éviter les manipulations du classement.
26
  </Highlight>
27
 
28
  return <Footer
 
3
  import {Highlight} from "@codegouvfr/react-dsfr/Highlight";
4
  import { Alert } from "@codegouvfr/react-dsfr/Alert";
5
  // import { Box, Typography, Link } from "@mui/material";
6
+ import { resolveLocalizedString, useResolveLocalizedString } from "i18n";
7
 
8
+ const footer = ({disclaimer}) => {
9
 
10
+ const {resolveLocalizedString} = useResolveLocalizedString();
11
  // const callout = <Alert
12
  // severity="warning"
13
  // description="
 
22
  classes={{
23
  root: 'fr-highlight--orange-terre-battue'
24
  }}>
25
+ {resolveLocalizedString(disclaimer)}
 
 
26
  </Highlight>
27
 
28
  return <Footer
frontend/src/pages/AddModelPage/components/ModelSubmissionForm/ModelSubmissionForm.jsx CHANGED
@@ -24,7 +24,7 @@ import InfoIconWithTooltip from "../../../../components/shared/InfoIconWithToolt
24
  import { MODEL_TYPES } from "../../../LeaderboardPage/components/Leaderboard/constants/modelTypes";
25
  import { SUBMISSION_PRECISIONS } from "../../../LeaderboardPage/components/Leaderboard/constants/defaults";
26
  import AuthContainer from "../../../../components/shared/AuthContainer";
27
- import { useResolveLocalizedString } from "i18n";
28
 
29
  const WEIGHT_TYPES = [
30
  { value: "Original", label: "Original" },
@@ -33,90 +33,118 @@ const WEIGHT_TYPES = [
33
  ];
34
 
35
  const HELP_TEXTS = {
36
- modelName: (
37
  <Box sx={{ p: 1 }}>
38
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
39
- Model Name on Hugging Face Hub
40
  </Typography>
41
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
42
- Your model must be public and loadable with AutoClasses without
43
- trust_remote_code. The model should be in Safetensors format for better
44
- safety and loading performance. Example: mistralai/Mistral-7B-v0.1
 
 
 
45
  </Typography>
46
  </Box>
47
  ),
48
- revision: (
49
  <Box sx={{ p: 1 }}>
50
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
51
- Model Revision
52
  </Typography>
53
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
54
- Git branch, tag or commit hash. The evaluation will be strictly tied to
55
- this specific commit to ensure consistency. Make sure this version is
56
- stable and contains all necessary files.
 
 
 
57
  </Typography>
58
  </Box>
59
  ),
60
- modelType: (
61
  <Box sx={{ p: 1 }}>
62
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
63
- Model Category
64
  </Typography>
65
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
66
- 🟢 Pretrained: Base models trained on text using masked modeling 🟩
67
- Continuously Pretrained: Extended training on additional corpus 🔶
68
- Fine-tuned: Domain-specific optimization 💬 Chat: Models using RLHF,
69
- DPO, or IFT for conversation 🤝 Merge: Combined weights without
70
- additional training 🌸 Multimodal: Handles multiple input types
 
71
  </Typography>
72
  </Box>
73
  ),
74
- baseModel: (
75
  <Box sx={{ p: 1 }}>
76
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
77
- Base Model Reference
78
  </Typography>
79
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
80
- Required for delta weights or adapters. This information is used to
81
- identify the original model and calculate the total parameter count by
82
- combining base model and adapter/delta parameters.
 
 
 
83
  </Typography>
84
  </Box>
85
  ),
86
- precision: (
87
  <Box sx={{ p: 1 }}>
88
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
89
- Model Precision
 
 
90
  </Typography>
91
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
92
- Size limits vary by precision: • FP16/BF16: up to 100B parameters •
93
- 8-bit: up to 280B parameters (2x) • 4-bit: up to 560B parameters (4x)
94
- Choose carefully as incorrect precision can cause evaluation errors.
 
 
 
95
  </Typography>
96
  </Box>
97
  ),
98
- weightsType: (
99
  <Box sx={{ p: 1 }}>
100
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
101
- Weights Format
 
 
 
102
  </Typography>
103
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
104
- Original: Complete model weights in safetensors format Delta: Weight
105
- differences from base model (requires base model for size calculation)
106
- Adapter: Lightweight fine-tuning layers (requires base model for size
107
- calculation)
 
 
108
  </Typography>
109
  </Box>
110
  ),
111
- chatTemplate: (
112
  <Box sx={{ p: 1 }}>
113
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
114
- Chat Template Support
 
 
 
 
 
115
  </Typography>
116
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
117
- Activates automatically for chat models. It uses the standardized Hugging
118
- Face chat template for consistent prompt formatting during evaluation.
119
- Required for models using RLHF, DPO, or instruction fine-tuning.
 
 
 
120
  </Typography>
121
  </Box>
122
  ),
@@ -258,8 +286,8 @@ function ModelSubmissionForm({ user, isAuthenticated }) {
258
  modelType: "fine-tuned",
259
  isChatModel: false,
260
  useChatTemplate: false,
261
- precision: "float16",
262
- weightsType: "Original",
263
  baseModel: "",
264
  });
265
  const [error, setError] = useState(null);
@@ -521,7 +549,7 @@ function ModelSubmissionForm({ user, isAuthenticated }) {
521
  <Grid item xs={12}>
522
  <Stack direction="row" spacing={1} alignItems="center">
523
  <Typography variant="h6">{resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_INFO.TITLE)}</Typography>
524
- <InfoIconWithTooltip tooltip={HELP_TEXTS.modelName} />
525
  </Stack>
526
  </Grid>
527
 
@@ -537,7 +565,7 @@ function ModelSubmissionForm({ user, isAuthenticated }) {
537
  helperText="Example: meta-llama/Llama-3.2-1B"
538
  InputProps={{
539
  endAdornment: (
540
- <InfoIconWithTooltip tooltip={HELP_TEXTS.modelName} />
541
  ),
542
  }}
543
  />
@@ -553,7 +581,7 @@ function ModelSubmissionForm({ user, isAuthenticated }) {
553
  helperText={resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_INFO.REVISION.HELPER_TEXT)}
554
  InputProps={{
555
  endAdornment: (
556
- <InfoIconWithTooltip tooltip={HELP_TEXTS.revision} />
557
  ),
558
  }}
559
  />
@@ -576,7 +604,7 @@ function ModelSubmissionForm({ user, isAuthenticated }) {
576
  label={resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_CONFIG.TYPE.LABEL)}
577
  endAdornment={
578
  <InfoIconWithTooltip
579
- tooltip={HELP_TEXTS.modelType}
580
  sx={{ mr: 2 }}
581
  />
582
  }
@@ -590,7 +618,7 @@ function ModelSubmissionForm({ user, isAuthenticated }) {
590
  </FormControl>
591
  </Grid>
592
 
593
- <Grid item xs={12} sm={6}>
594
  <Stack
595
  direction="row"
596
  spacing={2}
@@ -607,11 +635,11 @@ function ModelSubmissionForm({ user, isAuthenticated }) {
607
  }
608
  label={resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_CONFIG.TEMPLATE.LABEL)}
609
  />
610
- <InfoIconWithTooltip tooltip={HELP_TEXTS.chatTemplate} />
611
  </Stack>
612
- </Grid>
613
 
614
- <Grid item xs={12} sm={6}>
615
  <FormControl fullWidth>
616
  <InputLabel>{resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_CONFIG.PRECISION.LABEL)}</InputLabel>
617
  <Select
@@ -621,7 +649,7 @@ function ModelSubmissionForm({ user, isAuthenticated }) {
621
  label={resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_CONFIG.PRECISION.LABEL)}
622
  endAdornment={
623
  <InfoIconWithTooltip
624
- tooltip={HELP_TEXTS.precision}
625
  sx={{ mr: 2 }}
626
  />
627
  }
@@ -633,9 +661,9 @@ function ModelSubmissionForm({ user, isAuthenticated }) {
633
  ))}
634
  </Select>
635
  </FormControl>
636
- </Grid>
637
 
638
- <Grid item xs={12} sm={6}>
639
  <FormControl fullWidth>
640
  <InputLabel>{resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_CONFIG.WEIGHTS.LABEL)}</InputLabel>
641
  <Select
@@ -645,7 +673,7 @@ function ModelSubmissionForm({ user, isAuthenticated }) {
645
  label={resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_CONFIG.WEIGHTS.LABEL)}
646
  endAdornment={
647
  <InfoIconWithTooltip
648
- tooltip={HELP_TEXTS.weightsType}
649
  sx={{ mr: 2 }}
650
  />
651
  }
@@ -673,12 +701,12 @@ function ModelSubmissionForm({ user, isAuthenticated }) {
673
  onChange={handleChange}
674
  InputProps={{
675
  endAdornment: (
676
- <InfoIconWithTooltip tooltip={HELP_TEXTS.baseModel} />
677
  ),
678
  }}
679
  />
680
  </Grid>
681
- )}
682
 
683
  {/* Submit Button */}
684
  <Grid item xs={12}>
 
24
  import { MODEL_TYPES } from "../../../LeaderboardPage/components/Leaderboard/constants/modelTypes";
25
  import { SUBMISSION_PRECISIONS } from "../../../LeaderboardPage/components/Leaderboard/constants/defaults";
26
  import AuthContainer from "../../../../components/shared/AuthContainer";
27
+ import { resolveLocalizedString, useResolveLocalizedString } from "i18n";
28
 
29
  const WEIGHT_TYPES = [
30
  { value: "Original", label: "Original" },
 
33
  ];
34
 
35
  const HELP_TEXTS = {
36
+ modelName: () => (
37
  <Box sx={{ p: 1 }}>
38
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
39
+ {resolveLocalizedString({"en": "Model Name on Hugging Face Hub", "fr": "Nom du modèle sur le Hub Hugging Face"})}
40
  </Typography>
41
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
42
+ {
43
+ resolveLocalizedString({
44
+ "en": "Your model must be public and loadable with AutoClasses without trust_remote_code. The model should be in Safetensors format for better safety and loading performance. Example: mistralai/Mistral-7B-v0.1",
45
+ "fr": "Votre modèle doit être public et chargeable avec AutoClasses sans trust_remote_dode. Le modèle doit être dans le format Safetensors pour une meilleure sécurité et une meilleure performace de chargement. Exemple: mistralai/Mistral-7B-v0.1"
46
+ })
47
+ }
48
  </Typography>
49
  </Box>
50
  ),
51
+ revision: () => (
52
  <Box sx={{ p: 1 }}>
53
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
54
+ {resolveLocalizedString({"en": "Model Revision", "fr": "Révision du modèle"})}
55
  </Typography>
56
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
57
+ {
58
+ resolveLocalizedString({
59
+ "en": "Git branch, tag or commit hash. The evaluation will be strictly tied to this specific commit to ensure consistency. Make sure this version is stable and contains all necessary files.",
60
+ "fr": "Branche, tag ou hash de commit git. L'évaluation sera strictement liée à ce commit spécifique pour assurer une consistence. Assurez vous que cette version est stable et contient tous les fichiers nécessaires. "
61
+ })
62
+ }
63
  </Typography>
64
  </Box>
65
  ),
66
+ modelType: () => (
67
  <Box sx={{ p: 1 }}>
68
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
69
+ {resolveLocalizedString({"en": "Model Category", "fr": "Catégorie de modèle"})}
70
  </Typography>
71
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
72
+ {
73
+ resolveLocalizedString({
74
+ "en": "🟢 Pretrained: Base models trained on text using masked modeling 🟩 Continuously Pretrained: Extended training on additional corpus 🔶 Fine-tuned: Domain-specific optimization 💬 Chat: Models using RLHF, DPO, or IFT for conversation 🤝 Merge: Combined weights without additional training 🌸 Multimodal: Handles multiple input types",
75
+ "fr": "🟢 Pretrained: Modèles de base entrainés sur du texte en utilisant du modeling masqué 🟩 Continuously Pretrained: Entrainement étendu sur un corpus additionel 🔶 Fine-tuned: Optimisation spéficique à un domaine 💬 Chat: Modèles utilisant RLHF, DPO, ou IFT pour de la conversation 🤝 Merge: Pondérations combinés sans entrainement additionel 🌸 Multimodal: Gère plusieurs types d'entrée"
76
+ })
77
+ }
78
  </Typography>
79
  </Box>
80
  ),
81
+ baseModel: () => (
82
  <Box sx={{ p: 1 }}>
83
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
84
+ {resolveLocalizedString({"en": "Base Model Reference", "fr": "Référence du modèle de base"})}
85
  </Typography>
86
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
87
+ {
88
+ resolveLocalizedString({
89
+ "en": "Required for delta weights or adapters. This information is used to identify the original model and calculate the total parameter count by combining base model and adapter/delta parameters.",
90
+ "fr": "Nécessaire pour les delta weights ou les adapteurs. Cette information est utilisée pour identifier le modèle original est calculer le nombre total de paramètres en combinant les paramètres du modèles de base et les paramètres adatpeur/delta."
91
+ })
92
+ }
93
  </Typography>
94
  </Box>
95
  ),
96
+ precision: () => (
97
  <Box sx={{ p: 1 }}>
98
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
99
+ {resolveLocalizedString({
100
+ "en": "Model Precision", "fr": "Précision du modèle"
101
+ })}
102
  </Typography>
103
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
104
+ {
105
+ resolveLocalizedString({
106
+ "en": "Size limits vary by precision: • FP16/BF16: up to 100B parameters • 8-bit: up to 280B parameters (2x) • 4-bit: up to 560B parameters (4x) Choose carefully as incorrect precision can cause evaluation errors.",
107
+ "fr": "Les limites de taille varient en fonction de la précision: • FP16/BF16: jusqu'à 100B paramètres • 8-bit: jusqu'à 280B paramètres (2x) • 4-bit: jusqu'à 560B paramètres (4x) Choisissez avec attention: une précision incorrecte peut provoquer des erreurs d'évaluation."
108
+ })
109
+ }
110
  </Typography>
111
  </Box>
112
  ),
113
+ weightsType: () => (
114
  <Box sx={{ p: 1 }}>
115
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
116
+ {resolveLocalizedString({
117
+ "en": "Weights Format",
118
+ "fr": "Format de pondération"
119
+ })}
120
  </Typography>
121
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
122
+ {
123
+ resolveLocalizedString({
124
+ "en": "Original: Complete model weights in safetensors format Delta: Weight differences from base model (requires base model for size calculation) Adapter: Lightweight fine-tuning layers (requires base model for size calculation)",
125
+ "fr": "Original: modèle de pondération complet en format safetensors, Delta: Différences de poids du modèle de base (nécessite un modèle de base pour calcul de la taille), Adapteur: Couches d'affinage légères (requeirt un modèle de base pour le calcul de la taille)"
126
+ })
127
+ }
128
  </Typography>
129
  </Box>
130
  ),
131
+ chatTemplate: () => (
132
  <Box sx={{ p: 1 }}>
133
  <Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 0.5 }}>
134
+ {resolveLocalizedString(
135
+ {
136
+ "en": "Chat Template Support",
137
+ "fr": "Support de template de chat"
138
+ }
139
+ )}
140
  </Typography>
141
  <Typography variant="body2" sx={{ opacity: 0.9, lineHeight: 1.4 }}>
142
+ {
143
+ resolveLocalizedString({
144
+ "en": "Activates automatically for chat models. It uses the standardized Hugging Face chat template for consistent prompt formatting during evaluation. Required for models using RLHF, DPO, or instruction fine-tuning.",
145
+ "fr": "S'active automatiquement pour les modèles de chat. Utilise le template Hugging Face standardisé pour un formattage de prompt consistant durant l'évaluation. Nécessaire pour les modèles utilisant RLHF, DPO, ou un affinage d'instruction."
146
+ })
147
+ }
148
  </Typography>
149
  </Box>
150
  ),
 
286
  modelType: "fine-tuned",
287
  isChatModel: false,
288
  useChatTemplate: false,
289
+ precision: "",
290
+ weightsType: "",
291
  baseModel: "",
292
  });
293
  const [error, setError] = useState(null);
 
549
  <Grid item xs={12}>
550
  <Stack direction="row" spacing={1} alignItems="center">
551
  <Typography variant="h6">{resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_INFO.TITLE)}</Typography>
552
+ <InfoIconWithTooltip tooltip={HELP_TEXTS.modelName()} />
553
  </Stack>
554
  </Grid>
555
 
 
565
  helperText="Example: meta-llama/Llama-3.2-1B"
566
  InputProps={{
567
  endAdornment: (
568
+ <InfoIconWithTooltip tooltip={HELP_TEXTS.modelName()} />
569
  ),
570
  }}
571
  />
 
581
  helperText={resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_INFO.REVISION.HELPER_TEXT)}
582
  InputProps={{
583
  endAdornment: (
584
+ <InfoIconWithTooltip tooltip={HELP_TEXTS.revision()} />
585
  ),
586
  }}
587
  />
 
604
  label={resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_CONFIG.TYPE.LABEL)}
605
  endAdornment={
606
  <InfoIconWithTooltip
607
+ tooltip={HELP_TEXTS.modelType()}
608
  sx={{ mr: 2 }}
609
  />
610
  }
 
618
  </FormControl>
619
  </Grid>
620
 
621
+ {/* <Grid item xs={12} sm={6}>
622
  <Stack
623
  direction="row"
624
  spacing={2}
 
635
  }
636
  label={resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_CONFIG.TEMPLATE.LABEL)}
637
  />
638
+ <InfoIconWithTooltip tooltip={HELP_TEXTS.chatTemplate()} />
639
  </Stack>
640
+ </Grid> */}
641
 
642
+ {/* <Grid item xs={12} sm={6}>
643
  <FormControl fullWidth>
644
  <InputLabel>{resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_CONFIG.PRECISION.LABEL)}</InputLabel>
645
  <Select
 
649
  label={resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_CONFIG.PRECISION.LABEL)}
650
  endAdornment={
651
  <InfoIconWithTooltip
652
+ tooltip={HELP_TEXTS.precision()}
653
  sx={{ mr: 2 }}
654
  />
655
  }
 
661
  ))}
662
  </Select>
663
  </FormControl>
664
+ </Grid> */}
665
 
666
+ {/* <Grid item xs={12} sm={6}>
667
  <FormControl fullWidth>
668
  <InputLabel>{resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_CONFIG.WEIGHTS.LABEL)}</InputLabel>
669
  <Select
 
673
  label={resolveLocalizedString(INTERNATIONALISATION.FORM.MODEL_CONFIG.WEIGHTS.LABEL)}
674
  endAdornment={
675
  <InfoIconWithTooltip
676
+ tooltip={HELP_TEXTS.weightsType()}
677
  sx={{ mr: 2 }}
678
  />
679
  }
 
701
  onChange={handleChange}
702
  InputProps={{
703
  endAdornment: (
704
+ <InfoIconWithTooltip tooltip={HELP_TEXTS.baseModel()} />
705
  ),
706
  }}
707
  />
708
  </Grid>
709
+ )} */}
710
 
711
  {/* Submit Button */}
712
  <Grid item xs={12}>
frontend/src/pages/LeaderboardPage/components/Leaderboard/components/ColumnSelector/ColumnSelector.jsx CHANGED
@@ -8,6 +8,7 @@ import { TABLE_DEFAULTS } from "../../constants/defaults";
8
  import DropdownButton from "../shared/DropdownButton";
9
  import InfoIconWithTooltip from "../../../../../../components/shared/InfoIconWithTooltip";
10
  import { UI_TOOLTIPS } from "../../constants/tooltips";
 
11
 
12
  const FilterGroup = ({ title, children, count, total }) => (
13
  <Box
@@ -64,6 +65,7 @@ const ColumnSelector = ({
64
  }) => {
65
  const { getState, setColumnVisibility } = table;
66
  const { columnVisibility } = getState();
 
67
 
68
  // Filter columns to only show filterable ones
69
  const filterableColumns = [
@@ -104,9 +106,11 @@ const ColumnSelector = ({
104
  onColumnVisibilityChange?.(newVisibility);
105
  };
106
 
 
 
107
  return (
108
  <DropdownButton
109
- label="column visibility"
110
  icon={ViewColumnIcon}
111
  closeIcon={CloseIcon}
112
  hasChanges={hasChanges}
@@ -126,10 +130,10 @@ const ColumnSelector = ({
126
  >
127
  <Box sx={{ display: "flex", alignItems: "center", gap: 0.5 }}>
128
  <Typography variant="body2" sx={{ fontWeight: 600 }}>
129
- Column Visibility
130
  </Typography>
131
  <InfoIconWithTooltip
132
- tooltip={UI_TOOLTIPS.COLUMN_SELECTOR}
133
  iconProps={{ sx: { fontSize: "1rem" } }}
134
  />
135
  </Box>
@@ -165,16 +169,16 @@ const ColumnSelector = ({
165
  display: { xs: "none", sm: "block" },
166
  }}
167
  >
168
- Reset
169
  </Typography>
170
  </Box>
171
  </Box>
172
 
173
  {Object.entries(TABLE_DEFAULTS.COLUMNS.COLUMN_GROUPS).map(
174
- ([groupTitle, columns]) => {
175
  // Calculer le nombre de colonnes cochées pour les évaluations
176
- const isEvalGroup = groupTitle === "Evaluation Scores";
177
- const filteredColumns = columns.filter((col) =>
178
  filterableColumns.includes(col)
179
  );
180
  const checkedCount = isEvalGroup
@@ -184,8 +188,8 @@ const ColumnSelector = ({
184
 
185
  return (
186
  <FilterGroup
187
- key={groupTitle}
188
- title={groupTitle}
189
  count={checkedCount}
190
  total={totalCount}
191
  >
 
8
  import DropdownButton from "../shared/DropdownButton";
9
  import InfoIconWithTooltip from "../../../../../../components/shared/InfoIconWithTooltip";
10
  import { UI_TOOLTIPS } from "../../constants/tooltips";
11
+ import { resolveLocalizedString, useResolveLocalizedString } from "i18n";
12
 
13
  const FilterGroup = ({ title, children, count, total }) => (
14
  <Box
 
65
  }) => {
66
  const { getState, setColumnVisibility } = table;
67
  const { columnVisibility } = getState();
68
+ const {resolveLocalizedString} = useResolveLocalizedString();
69
 
70
  // Filter columns to only show filterable ones
71
  const filterableColumns = [
 
106
  onColumnVisibilityChange?.(newVisibility);
107
  };
108
 
109
+ const label = resolveLocalizedString({"en": "column visibility", "fr": "affichage colonnes"});
110
+
111
  return (
112
  <DropdownButton
113
+ label={label}
114
  icon={ViewColumnIcon}
115
  closeIcon={CloseIcon}
116
  hasChanges={hasChanges}
 
130
  >
131
  <Box sx={{ display: "flex", alignItems: "center", gap: 0.5 }}>
132
  <Typography variant="body2" sx={{ fontWeight: 600 }}>
133
+ {resolveLocalizedString({"en": "Column Visibility", "fr": "Visibilité des colonnes"})}
134
  </Typography>
135
  <InfoIconWithTooltip
136
+ tooltip={resolveLocalizedString(UI_TOOLTIPS.COLUMN_SELECTOR)}
137
  iconProps={{ sx: { fontSize: "1rem" } }}
138
  />
139
  </Box>
 
169
  display: { xs: "none", sm: "block" },
170
  }}
171
  >
172
+ {resolveLocalizedString({"en": "Reset", "fr": "Réinitialiser"})}
173
  </Typography>
174
  </Box>
175
  </Box>
176
 
177
  {Object.entries(TABLE_DEFAULTS.COLUMNS.COLUMN_GROUPS).map(
178
+ ([key, info]) => {
179
  // Calculer le nombre de colonnes cochées pour les évaluations
180
+ const isEvalGroup = key === "eval_scores";
181
+ const filteredColumns = info.items.filter((col) =>
182
  filterableColumns.includes(col)
183
  );
184
  const checkedCount = isEvalGroup
 
188
 
189
  return (
190
  <FilterGroup
191
+ key={key}
192
+ title={resolveLocalizedString(info.displayName)}
193
  count={checkedCount}
194
  total={totalCount}
195
  >
frontend/src/pages/LeaderboardPage/components/Leaderboard/components/DisplayOptions/DisplayOptions.jsx CHANGED
@@ -13,6 +13,7 @@ import {
13
  import { UI_TOOLTIPS } from "../../constants/tooltips";
14
  import DropdownButton from "../shared/DropdownButton";
15
  import InfoIconWithTooltip from "../../../../../../components/shared/InfoIconWithTooltip";
 
16
 
17
  const TableOptions = ({
18
  rowSize,
@@ -43,15 +44,18 @@ const TableOptions = ({
43
  setSearchParams(newParams);
44
  };
45
 
 
 
46
  return (
47
  <DropdownButton
48
- label="table options"
49
  icon={TuneIcon}
50
  closeIcon={CloseIcon}
51
  hasChanges={hasChanges}
52
  loading={loading}
53
  defaultWidth={260}
54
- tooltip={UI_TOOLTIPS.DISPLAY_OPTIONS}
 
55
  >
56
  <Box
57
  sx={{
@@ -66,10 +70,10 @@ const TableOptions = ({
66
  >
67
  <Box sx={{ display: "flex", alignItems: "center", gap: 0.5 }}>
68
  <Typography variant="body2" sx={{ fontWeight: 600 }}>
69
- Table Options
70
  </Typography>
71
  <InfoIconWithTooltip
72
- tooltip={UI_TOOLTIPS.DISPLAY_OPTIONS}
73
  iconProps={{ sx: { fontSize: "1rem", ml: 0.5 } }}
74
  />
75
  </Box>
@@ -104,7 +108,7 @@ const TableOptions = ({
104
  display: { xs: "none", sm: "block" },
105
  }}
106
  >
107
- Reset
108
  </Typography>
109
  </Box>
110
  </Box>
@@ -120,10 +124,10 @@ const TableOptions = ({
120
  }}
121
  >
122
  <Typography variant="subtitle2">
123
- {UI_TOOLTIPS.ROW_SIZE.title}
124
  </Typography>
125
  <InfoIconWithTooltip
126
- tooltip={UI_TOOLTIPS.ROW_SIZE.description}
127
  iconProps={{ sx: { fontSize: "1rem", ml: 0.5 } }}
128
  />
129
  </Box>
@@ -149,10 +153,10 @@ const TableOptions = ({
149
  }}
150
  >
151
  <Typography variant="subtitle2">
152
- {UI_TOOLTIPS.SCORE_DISPLAY.title}
153
  </Typography>
154
  <InfoIconWithTooltip
155
- tooltip={UI_TOOLTIPS.SCORE_DISPLAY.description}
156
  iconProps={{ sx: { fontSize: "1rem", ml: 0.5 } }}
157
  />
158
  </Box>
@@ -160,7 +164,7 @@ const TableOptions = ({
160
  {SCORE_DISPLAY_OPTIONS.map(({ value, label }) => (
161
  <FilterTag
162
  key={value}
163
- label={label}
164
  checked={scoreDisplay === value}
165
  onChange={() => onScoreDisplayChange(value)}
166
  variant="tag"
@@ -178,10 +182,10 @@ const TableOptions = ({
178
  }}
179
  >
180
  <Typography variant="subtitle2">
181
- {UI_TOOLTIPS.RANKING_MODE.title}
182
  </Typography>
183
  <InfoIconWithTooltip
184
- tooltip={UI_TOOLTIPS.RANKING_MODE.description}
185
  iconProps={{ sx: { fontSize: "1rem", ml: 0.5 } }}
186
  />
187
  </Box>
@@ -189,7 +193,7 @@ const TableOptions = ({
189
  {RANKING_MODE_OPTIONS.map(({ value, label }) => (
190
  <FilterTag
191
  key={value}
192
- label={label}
193
  checked={rankingMode === value}
194
  onChange={() => onRankingModeChange(value)}
195
  variant="tag"
@@ -207,22 +211,22 @@ const TableOptions = ({
207
  }}
208
  >
209
  <Typography variant="subtitle2">
210
- {UI_TOOLTIPS.AVERAGE_SCORE.title}
211
  </Typography>
212
  <InfoIconWithTooltip
213
- tooltip={UI_TOOLTIPS.AVERAGE_SCORE.description}
214
  iconProps={{ sx: { fontSize: "1rem", ml: 0.5 } }}
215
  />
216
  </Box>
217
  <Box sx={{ display: "flex", gap: 1 }}>
218
  <FilterTag
219
- label="All Scores"
220
  checked={averageMode === "all"}
221
  onChange={() => onAverageModeChange("all")}
222
  variant="tag"
223
  />
224
  <FilterTag
225
- label="Visible Only"
226
  checked={averageMode === "visible"}
227
  onChange={() => onAverageModeChange("visible")}
228
  variant="tag"
 
13
  import { UI_TOOLTIPS } from "../../constants/tooltips";
14
  import DropdownButton from "../shared/DropdownButton";
15
  import InfoIconWithTooltip from "../../../../../../components/shared/InfoIconWithTooltip";
16
+ import { useResolveLocalizedString } from "i18n";
17
 
18
  const TableOptions = ({
19
  rowSize,
 
44
  setSearchParams(newParams);
45
  };
46
 
47
+ const {resolveLocalizedString} = useResolveLocalizedString();
48
+
49
  return (
50
  <DropdownButton
51
+ label={resolveLocalizedString({"en": "table options", "fr": "options tableau"})}
52
  icon={TuneIcon}
53
  closeIcon={CloseIcon}
54
  hasChanges={hasChanges}
55
  loading={loading}
56
  defaultWidth={260}
57
+ tooltip={UI_TOOLTIPS.DISPLAY_OPTIONS()}
58
+ smallWidth
59
  >
60
  <Box
61
  sx={{
 
70
  >
71
  <Box sx={{ display: "flex", alignItems: "center", gap: 0.5 }}>
72
  <Typography variant="body2" sx={{ fontWeight: 600 }}>
73
+ {resolveLocalizedString({"en": "Table Options", "fr": "Options"})}
74
  </Typography>
75
  <InfoIconWithTooltip
76
+ tooltip={UI_TOOLTIPS.DISPLAY_OPTIONS()}
77
  iconProps={{ sx: { fontSize: "1rem", ml: 0.5 } }}
78
  />
79
  </Box>
 
108
  display: { xs: "none", sm: "block" },
109
  }}
110
  >
111
+ {resolveLocalizedString({"en": "Reset", "fr": "Réinitialiser"})}
112
  </Typography>
113
  </Box>
114
  </Box>
 
124
  }}
125
  >
126
  <Typography variant="subtitle2">
127
+ {resolveLocalizedString(UI_TOOLTIPS.ROW_SIZE.title)}
128
  </Typography>
129
  <InfoIconWithTooltip
130
+ tooltip={resolveLocalizedString(UI_TOOLTIPS.ROW_SIZE.description)}
131
  iconProps={{ sx: { fontSize: "1rem", ml: 0.5 } }}
132
  />
133
  </Box>
 
153
  }}
154
  >
155
  <Typography variant="subtitle2">
156
+ {resolveLocalizedString(UI_TOOLTIPS.SCORE_DISPLAY.title)}
157
  </Typography>
158
  <InfoIconWithTooltip
159
+ tooltip={resolveLocalizedString(UI_TOOLTIPS.SCORE_DISPLAY.description)}
160
  iconProps={{ sx: { fontSize: "1rem", ml: 0.5 } }}
161
  />
162
  </Box>
 
164
  {SCORE_DISPLAY_OPTIONS.map(({ value, label }) => (
165
  <FilterTag
166
  key={value}
167
+ label={resolveLocalizedString(label)}
168
  checked={scoreDisplay === value}
169
  onChange={() => onScoreDisplayChange(value)}
170
  variant="tag"
 
182
  }}
183
  >
184
  <Typography variant="subtitle2">
185
+ {resolveLocalizedString(UI_TOOLTIPS.RANKING_MODE.title)}
186
  </Typography>
187
  <InfoIconWithTooltip
188
+ tooltip={resolveLocalizedString(UI_TOOLTIPS.RANKING_MODE.description)}
189
  iconProps={{ sx: { fontSize: "1rem", ml: 0.5 } }}
190
  />
191
  </Box>
 
193
  {RANKING_MODE_OPTIONS.map(({ value, label }) => (
194
  <FilterTag
195
  key={value}
196
+ label={resolveLocalizedString(label)}
197
  checked={rankingMode === value}
198
  onChange={() => onRankingModeChange(value)}
199
  variant="tag"
 
211
  }}
212
  >
213
  <Typography variant="subtitle2">
214
+ {resolveLocalizedString(UI_TOOLTIPS.AVERAGE_SCORE.title)}
215
  </Typography>
216
  <InfoIconWithTooltip
217
+ tooltip={resolveLocalizedString(UI_TOOLTIPS.AVERAGE_SCORE.description)}
218
  iconProps={{ sx: { fontSize: "1rem", ml: 0.5 } }}
219
  />
220
  </Box>
221
  <Box sx={{ display: "flex", gap: 1 }}>
222
  <FilterTag
223
+ label={resolveLocalizedString({"en": "All Scores", "fr": "Tous"})}
224
  checked={averageMode === "all"}
225
  onChange={() => onAverageModeChange("all")}
226
  variant="tag"
227
  />
228
  <FilterTag
229
+ label={resolveLocalizedString({"en": "Visible Only", "fr": "Visibles"})}
230
  checked={averageMode === "visible"}
231
  onChange={() => onAverageModeChange("visible")}
232
  variant="tag"
frontend/src/pages/LeaderboardPage/components/Leaderboard/components/Filters/SearchBar.jsx CHANGED
@@ -314,7 +314,7 @@ const SearchBar = ({
314
  </Box> */}
315
 
316
  <InfoIconWithTooltip
317
- tooltip={UI_TOOLTIPS.SEARCH_BAR}
318
  iconProps={{
319
  sx: { fontSize: "1.2rem", display: { xs: "none", md: "block" } },
320
  }}
 
314
  </Box> */}
315
 
316
  <InfoIconWithTooltip
317
+ tooltip={UI_TOOLTIPS.SEARCH_BAR()}
318
  iconProps={{
319
  sx: { fontSize: "1.2rem", display: { xs: "none", md: "block" } },
320
  }}
frontend/src/pages/LeaderboardPage/components/Leaderboard/components/shared/DropdownButton.jsx CHANGED
@@ -13,6 +13,7 @@ const DropdownButton = ({
13
  paperProps = {},
14
  buttonSx = {},
15
  loading = false,
 
16
  }) => {
17
  const theme = useTheme();
18
  const [anchorEl, setAnchorEl] = useState(null);
@@ -34,7 +35,7 @@ const DropdownButton = ({
34
  <Skeleton
35
  variant="rounded"
36
  sx={{
37
- width: label === "table options" ? 120 : 140,
38
  height: 32,
39
  transform: "none",
40
  borderRadius: 1,
 
13
  paperProps = {},
14
  buttonSx = {},
15
  loading = false,
16
+ smallWidth = false,
17
  }) => {
18
  const theme = useTheme();
19
  const [anchorEl, setAnchorEl] = useState(null);
 
35
  <Skeleton
36
  variant="rounded"
37
  sx={{
38
+ width: smallWidth ? 120 : 140,
39
  height: 32,
40
  transform: "none",
41
  borderRadius: 1,
frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/defaults.tsx CHANGED
@@ -18,12 +18,12 @@ const DISPLAY = {
18
  large: 60,
19
  },
20
  SCORE_DISPLAY_OPTIONS: [
21
- { value: "normalized", label: "Normalized" },
22
- { value: "raw", label: "Raw" },
23
  ],
24
  RANKING_MODE_OPTIONS: [
25
- { value: "static", label: "Static" },
26
- { value: "dynamic", label: "Dynamic" },
27
  ],
28
  };
29
 
@@ -268,7 +268,7 @@ const ALL_COLUMNS = {
268
  const COLUMN_DEFINITIONS = {
269
  ALL_COLUMNS,
270
  COLUMN_GROUPS: {
271
- "Evaluation Scores": Object.keys(COLUMNS.EVALUATION),
272
  // "Model Information": Object.keys(COLUMNS.MODEL_INFO),
273
  // "Additional Information": Object.keys(COLUMNS.ADDITIONAL_INFO),
274
  },
 
18
  large: 60,
19
  },
20
  SCORE_DISPLAY_OPTIONS: [
21
+ { value: "normalized", label: {"en": "Normalized", "fr": "Normalisé"} },
22
+ { value: "raw", label: {"en": "Raw", "fr": "Bruts"} },
23
  ],
24
  RANKING_MODE_OPTIONS: [
25
+ { value: "static", label: {"en": "Static", "fr": "Statique"} },
26
+ { value: "dynamic", label: {"en": "Dynamic", "fr": "Dynamique"} },
27
  ],
28
  };
29
 
 
268
  const COLUMN_DEFINITIONS = {
269
  ALL_COLUMNS,
270
  COLUMN_GROUPS: {
271
+ "eval_scores": {displayName: {"en": "Evaluation Scores", "fr": "Scores d'évaluation"}, items: Object.keys(COLUMNS.EVALUATION)},
272
  // "Model Information": Object.keys(COLUMNS.MODEL_INFO),
273
  // "Additional Information": Object.keys(COLUMNS.ADDITIONAL_INFO),
274
  },
frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/tooltips.jsx CHANGED
@@ -1,21 +1,23 @@
1
  import { Box, Typography } from "@mui/material";
 
2
 
3
  const createTooltipContent = (title, items) => (
 
4
  <Box sx={{ maxWidth: 400 }}>
5
  <Typography variant="body2" paragraph sx={{ mb: 1, color: "inherit" }}>
6
- {title}
7
  </Typography>
8
  <Box component="ul" sx={{ m: 0, pl: 2 }}>
9
  {items.map(({ label, description, subItems }, index) => (
10
  <li key={index}>
11
  <Typography variant="body2" sx={{ mb: 0.5, color: "inherit" }}>
12
- <b>{label}</b>: {description}
13
  {subItems && (
14
  <Box component="ul" sx={{ mt: 0.5, mb: 1 }}>
15
  {subItems.map((item, subIndex) => (
16
  <li key={subIndex}>
17
  <Typography variant="body2" sx={{ color: "inherit" }}>
18
- {item}
19
  </Typography>
20
  </li>
21
  ))}
@@ -29,279 +31,334 @@ const createTooltipContent = (title, items) => (
29
  );
30
 
31
  export const COLUMN_TOOLTIPS = {
32
- AVERAGE: createTooltipContent("Average score across all benchmarks:", [
 
 
 
33
  {
34
- label: "Calculation",
35
- description: "Weighted average of normalized scores from all benchmarks",
 
 
 
 
 
 
36
  subItems: [
37
- "Each benchmark is normalized to a 0-100 scale",
38
- "All normalised benchmarks are then averaged together",
 
 
 
 
 
 
39
  ],
40
  },
41
  ]),
42
 
43
- IFEVALFR: createTooltipContent("Instruction-Following Evaluation (IFEval):", [
 
 
 
44
  {
45
- label: "Purpose",
46
  description:
47
- "Tests model's ability to follow explicit formatting instructions",
48
- subItems: ["Instruction following", "Formatting", "Generation"],
 
 
 
 
 
 
 
49
  },
50
  {
51
- label: "Scoring: Accuracy",
52
- description: "Was the format asked for strictly respected.",
53
  },
54
  ]),
55
 
56
- GPQAFR: createTooltipContent("Graduate-Level Google-Proof Q&A (GPQA):", [
57
  {
58
- label: "Focus",
59
- description: "PhD-level knowledge multiple choice questions in science",
60
  subItems: [
61
- "Chemistry",
62
- "Biology",
63
- "Physics",
64
  ],
65
  },
66
  {
67
- label: "Scoring: Accuracy",
68
  description:
69
- "Was the correct choice selected among the options.",
70
  },
71
  ]),
72
 
73
- BACFR: createTooltipContent("French Baccalauréat (BAC FR):", [
74
  {
75
- label: "Scope",
76
- description: "more than 700 question from the baccalauréat exam (end of high school national French exam), including the followong subjects : maths, computer science and physics-chemestry. The extracted questions are multiple choice, code completation or exact solution answer (with a tolerence degree).",
 
 
 
77
  },
78
  {
79
- label: "Scoring: Accuracy",
 
80
  description:
81
- "(Prefix quasi-exact match) Was the solution generated correct and in the expected format.",
 
 
 
82
  },
83
  ]),
84
 
85
- ARCHITECTURE: createTooltipContent("Model Architecture Information:", [
86
- {
87
- label: "Definition",
88
- description: "The fundamental structure and design of the model",
89
- subItems: [
90
- "Pretrained: Foundational models, initially trained on large datasets without task-specific tuning, serving as a versatile base for further development.",
91
- "Continuously Pretrained: Base models trained with a data mix evolving as the model is trained, with the addition of specialized data during the last training steps.",
92
- "Fine-tuned: Base models, fine-tuned on specialised domain data (legal, medical, ...), and optimized for particular tasks.",
93
- "Chat: Models fine-tuned with IFT, RLHF, DPO, and other techniques, to handle conversational contexts effectively.",
94
- "Merged: Combining multiple models through weights averaging or similar methods.",
95
- "Multimodal: Models which can handle several modalities (text & image/audio/video/...). We only evaluate the text capabilities.",
96
- ],
97
- },
98
- {
99
- label: "Impact",
100
- description: "How architecture affects model capabilities",
101
- subItems: [
102
- "Base models are expected to perform less well on instruction following evaluations, like IFEval.",
103
- "Fine-tuned and chat models can be more verbose and more chatty than base models.",
104
- "Merged models tend to exhibit good performance on benchmarks, which do not translate to real-world situations.",
105
- ],
106
- },
107
- ]),
108
 
109
- PRECISION: createTooltipContent("Numerical Precision Format:", [
110
- {
111
- label: "Overview",
112
- description:
113
- "Data format used to store model weights and perform computations",
114
- subItems: [
115
- "bfloat16: Half precision (Brain Float format), good for stability",
116
- "float16: Half precision",
117
- "8bit/4bit: Quantized formats, for efficiency",
118
- "GPTQ/AWQ: Quantized methods",
119
- ],
120
- },
121
- {
122
- label: "Impact",
123
- description: "How precision affects model deployment",
124
- subItems: [
125
- "Higher precision = better accuracy but more memory usage",
126
- "Lower precision = faster inference and smaller size",
127
- "Trade-off between model quality and resource usage",
128
- ],
129
- },
130
- ]),
131
 
132
- FLAGS: createTooltipContent("Model Flags and Special Features:", [
133
- {
134
- label: "Filters",
135
- subItems: [
136
- "Mixture of Expert: Uses a MoE architecture",
137
- "Merged models: Created by averaging other models",
138
- "Contaminated: Flagged by users from the community for (possibly accidental) cheating",
139
- "Unavailable: No longer on the hub (private, deleted) or missing a license tag",
140
- ],
141
- },
142
- {
143
- label: "Purpose",
144
- description: "Why do people want to hide these models?",
145
- subItems: [
146
- "Mixture of Experts: These models can be too parameter heavy",
147
- "Merged models: Performance on benchmarks tend to be inflated compared to real life usage",
148
- "Contaminated: Performance on benchmarks is inflated and not reflecting real life usage",
149
- ],
150
- },
151
- ]),
152
 
153
- PARAMETERS: createTooltipContent("Model Parameters:", [
154
- {
155
- label: "Measurement",
156
- description: "Total number of trainable parameters in billions",
157
- subItems: [
158
- "Indicates model capacity and complexity",
159
- "Correlates with computational requirements",
160
- "Influences memory usage and inference speed",
161
- ],
162
- },
163
- ]),
164
 
165
- LICENSE: createTooltipContent("Model License Information:", [
166
- {
167
- label: "Importance",
168
- description: "Legal terms governing model usage and distribution",
169
- subItems: [
170
- "Commercial vs non-commercial use",
171
- "Attribution requirements",
172
- "Modification and redistribution rights",
173
- "Liability and warranty terms",
174
- ],
175
- },
176
- ]),
177
 
178
- CO2_COST: createTooltipContent("Carbon Dioxide Emissions:", [
179
- {
180
- label: "What is it?",
181
- description: "CO₂ emissions of the model evaluation ",
182
- subItems: [
183
- "Only focuses on model inference for our specific setup",
184
- "Considers data center location and energy mix",
185
- "Allows equivalent comparision of models on our use case",
186
- ],
187
- },
188
- {
189
- label: "Why it matters",
190
- description: "Environmental impact of AI model training",
191
- subItems: [
192
- "Large models can have significant carbon footprints",
193
- "Helps make informed choices about model selection",
194
- ],
195
- },
196
- {
197
- label: "Learn more",
198
- description:
199
- "For detailed information about our CO₂ calculation methodology, visit:",
200
- subItems: [
201
- <a
202
- href="https://huggingface.co/docs/leaderboards/open_llm_leaderboard/emissions"
203
- target="_blank"
204
- rel="noopener noreferrer"
205
- style={{ color: "#90caf9" }}
206
- >
207
- Carbon Emissions Documentation ↗
208
- </a>,
209
- ],
210
- },
211
- ]),
212
  };
213
 
214
  export const UI_TOOLTIPS = {
215
- COLUMN_SELECTOR: "Choose which columns to display in the table",
216
- DISPLAY_OPTIONS: createTooltipContent("Table Display Options", [
217
  {
218
- label: "Overview",
219
- description: "Configure how the table displays data and information",
220
  subItems: [
221
- "Row size and layout",
222
- "Score display format",
223
- "Ranking calculation",
224
- "Average score computation",
225
  ],
226
  },
227
  ]),
228
- SEARCH_BAR: createTooltipContent("Advanced Model Search", [
 
 
 
229
  {
230
- label: "Name Search",
231
- description: "Search directly by model name",
 
 
 
 
 
 
232
  subItems: [
233
- "Supports regular expressions (e.g., ^mistral.*7b)",
234
- "Case sensitive",
 
 
 
 
 
 
235
  ],
236
  },
237
  {
238
- label: "Field Search",
239
- description: "Use @field:value syntax for precise filtering",
240
  subItems: [
241
- "@architecture:llama - Filter by architecture",
242
- "@license:mit - Filter by license",
243
- "@precision:float16 - Filter by precision",
244
- "@type:chat - Filter by model type",
245
  ],
246
  },
247
  {
248
- label: "Multiple Searches",
249
- description: "Combine multiple criteria using semicolons",
250
  subItems: [
251
- "meta @license:mit; @architecture:llama",
252
- "^mistral.*7b; @precision:float16",
253
  ],
254
  },
255
  ]),
256
- QUICK_FILTERS: createTooltipContent(
257
- "Filter models based on their size and applicable hardware:",
258
- [
259
- {
260
- label: "Edge devices (Up to 3BB)",
261
- description:
262
- "Efficient models for edge devices, optimized for blazing fast inference.",
263
- },
264
- {
265
- label: "Smol Models (3B-7B)",
266
- description:
267
- "Efficient models for consumer hardware, optimized for fast inference.",
268
- },
269
- {
270
- label: "Mid-range models (7B-65B)",
271
- description:
272
- "A bit of everything here, with overall balanced performance and resource usage around 30B.",
273
- },
274
- {
275
- label: "GPU-rich models (65B+)",
276
- description:
277
- "State-of-the-art performance for complex tasks, requires significant computing power.",
278
- },
279
- {
280
- label: "Official Providers",
281
- description:
282
- "Models directly maintained by their original creators, ensuring reliability and up-to-date performance.",
283
- },
284
- ]
285
- ),
286
  ROW_SIZE: {
287
- title: "Row Size",
288
  description:
289
- "Adjust the height of table rows. Compact is ideal for viewing more data at once, while Large provides better readability and touch targets.",
 
 
290
  },
291
  SCORE_DISPLAY: {
292
- title: "Score Display",
293
  description:
294
- "Choose between normalized scores (0-100% scale for easy comparison) or raw scores (actual benchmark results). Normalized scores help compare performance across different benchmarks, while raw scores show actual benchmark outputs.",
 
 
295
  },
296
  RANKING_MODE: {
297
- title: "Ranking Mode",
298
  description:
299
- "Choose between static ranking (original position in the full leaderboard) or dynamic ranking (position based on current filters and sorting).",
 
 
300
  },
301
  AVERAGE_SCORE: {
302
- title: "Average Score Calculation",
303
  description:
304
- "Define how the average score is calculated. 'All Scores' uses all benchmarks, while 'Visible Only' calculates the average using only the visible benchmark columns.",
 
 
305
  },
306
  };
307
 
 
1
  import { Box, Typography } from "@mui/material";
2
+ import { resolveLocalizedString } from "i18n";
3
 
4
  const createTooltipContent = (title, items) => (
5
+
6
  <Box sx={{ maxWidth: 400 }}>
7
  <Typography variant="body2" paragraph sx={{ mb: 1, color: "inherit" }}>
8
+ {resolveLocalizedString(title)}
9
  </Typography>
10
  <Box component="ul" sx={{ m: 0, pl: 2 }}>
11
  {items.map(({ label, description, subItems }, index) => (
12
  <li key={index}>
13
  <Typography variant="body2" sx={{ mb: 0.5, color: "inherit" }}>
14
+ <b>{resolveLocalizedString(label)}</b>: {resolveLocalizedString(description)}
15
  {subItems && (
16
  <Box component="ul" sx={{ mt: 0.5, mb: 1 }}>
17
  {subItems.map((item, subIndex) => (
18
  <li key={subIndex}>
19
  <Typography variant="body2" sx={{ color: "inherit" }}>
20
+ {resolveLocalizedString(item)}
21
  </Typography>
22
  </li>
23
  ))}
 
31
  );
32
 
33
  export const COLUMN_TOOLTIPS = {
34
+ AVERAGE: () => createTooltipContent({
35
+ "en": "Average score across all benchmarks:",
36
+ "fr": "Score moyen à travers tous les benchmarks"
37
+ }, [
38
  {
39
+ label: {
40
+ "en": "Calculation",
41
+ "fr": "Calcul"
42
+ },
43
+ description: {
44
+ "en": "Weighted average of normalized scores from all benchmarks",
45
+ "fr": "Moyenne pondérée des scores normalisés pour tous les benchmarks"
46
+ },
47
  subItems: [
48
+ {
49
+ "en": "Each benchmark is normalized to a 0-100 scale",
50
+ "fr": "Chage benchmark est normalisé sur une échelle de 0-100"
51
+ },
52
+ {
53
+ "en": "All normalised benchmarks are then averaged together",
54
+ "fr": "Tous les benchmarks normalisés sont ensuite moyennés"
55
+ }
56
  ],
57
  },
58
  ]),
59
 
60
+ IFEVALFR: () => createTooltipContent({
61
+ "en": "Instruction-Following Evaluation (IFEval):",
62
+ "fr": "Instruction-Following Evaluation (IFEval):"
63
+ }, [
64
  {
65
+ label: {"en": "Purpose", "fr": "Objectif"},
66
  description:
67
+ {
68
+ "en": "Tests model's ability to follow explicit formatting instructions",
69
+ "fr": "Teste la capacité du modèle à suivre des intructions de formatage explicites"
70
+ },
71
+ subItems: [
72
+ {"en": "Instruction following", "fr": "Suit les instructions"},
73
+ {"en": "Formatting", "fr": "Formatage"},
74
+ {"en": "Generation", "fr": "Génération"}
75
+ ],
76
  },
77
  {
78
+ label: {"en": "Scoring: Accuracy", "fr": "Notation: Précision"},
79
+ description: {"en": "Was the format asked for strictly respected.", "fr": "Est-ce que le format demandé a été respecté strictement"},
80
  },
81
  ]),
82
 
83
+ GPQAFR: () => createTooltipContent({"en": "Graduate-Level Google-Proof Q&A (GPQA):", "fr": "Graduate-Level Google-Proof Q&A (GPQA):"}, [
84
  {
85
+ label: {"en": "Focus", "fr": "Accent"},
86
+ description: {"en": "PhD-level knowledge multiple choice questions in science", "fr": "Questions à choix multiples de niveau doctorat en science"},
87
  subItems: [
88
+ {"en": "Chemistry", "fr": "Chimie"},
89
+ {"en": "Biology", "fr": "Biologie"},
90
+ {"en": "Physics", "fr": "Physique"},
91
  ],
92
  },
93
  {
94
+ label: {"en": "Scoring: Accuracy", "fr": "Notation: Précision"},
95
  description:
96
+ {"en": "Was the correct choice selected among the options.", "fr": "Est-ce que le bon choix a été sélectionné parmi les options"},
97
  },
98
  ]),
99
 
100
+ BACFR: () => createTooltipContent({"en": "French Baccalauréat (BAC FR):", "fr": "Baccalauréat Français (BAC FR)"}, [
101
  {
102
+ label: {"en": "Scope", "fr": "Cadre"},
103
+ description: {
104
+ "en": "More than 700 question from the baccalauréat exam (end of high school national French exam), including the followong subjects : maths, computer science and physics-chemestry. The extracted questions are multiple choice, code completation or exact solution answer (with a tolerence degree).",
105
+ "fr": "Plus de 700 questions du baccalauréat, comprenant les matières suivantes : mathématiques, numérique et sciende de l’informatique et physique-chimie. Les questions extraites sont des questions du type à choix multiple, de la completion du code ou des solutions exactes (avec un degré de tolérance)."
106
+ },
107
  },
108
  {
109
+ label: {"en": "Scoring: Accuracy", "fr": "Notation: Précision"
110
+ },
111
  description:
112
+ {
113
+ "en": "(Prefix quasi-exact match) Was the solution generated correct and in the expected format.",
114
+ "fr": "(Prefix quasi-exact match) La solution générée était-elle correcte et dans le format attendu."
115
+ },
116
  },
117
  ]),
118
 
119
+ // ARCHITECTURE: createTooltipContent("Model Architecture Information:", [
120
+ // {
121
+ // label: "Definition",
122
+ // description: "The fundamental structure and design of the model",
123
+ // subItems: [
124
+ // "Pretrained: Foundational models, initially trained on large datasets without task-specific tuning, serving as a versatile base for further development.",
125
+ // "Continuously Pretrained: Base models trained with a data mix evolving as the model is trained, with the addition of specialized data during the last training steps.",
126
+ // "Fine-tuned: Base models, fine-tuned on specialised domain data (legal, medical, ...), and optimized for particular tasks.",
127
+ // "Chat: Models fine-tuned with IFT, RLHF, DPO, and other techniques, to handle conversational contexts effectively.",
128
+ // "Merged: Combining multiple models through weights averaging or similar methods.",
129
+ // "Multimodal: Models which can handle several modalities (text & image/audio/video/...). We only evaluate the text capabilities.",
130
+ // ],
131
+ // },
132
+ // {
133
+ // label: "Impact",
134
+ // description: "How architecture affects model capabilities",
135
+ // subItems: [
136
+ // "Base models are expected to perform less well on instruction following evaluations, like IFEval.",
137
+ // "Fine-tuned and chat models can be more verbose and more chatty than base models.",
138
+ // "Merged models tend to exhibit good performance on benchmarks, which do not translate to real-world situations.",
139
+ // ],
140
+ // },
141
+ // ]),
142
 
143
+ // PRECISION: createTooltipContent("Numerical Precision Format:", [
144
+ // {
145
+ // label: "Overview",
146
+ // description:
147
+ // "Data format used to store model weights and perform computations",
148
+ // subItems: [
149
+ // "bfloat16: Half precision (Brain Float format), good for stability",
150
+ // "float16: Half precision",
151
+ // "8bit/4bit: Quantized formats, for efficiency",
152
+ // "GPTQ/AWQ: Quantized methods",
153
+ // ],
154
+ // },
155
+ // {
156
+ // label: "Impact",
157
+ // description: "How precision affects model deployment",
158
+ // subItems: [
159
+ // "Higher precision = better accuracy but more memory usage",
160
+ // "Lower precision = faster inference and smaller size",
161
+ // "Trade-off between model quality and resource usage",
162
+ // ],
163
+ // },
164
+ // ]),
165
 
166
+ // FLAGS: createTooltipContent("Model Flags and Special Features:", [
167
+ // {
168
+ // label: "Filters",
169
+ // subItems: [
170
+ // "Mixture of Expert: Uses a MoE architecture",
171
+ // "Merged models: Created by averaging other models",
172
+ // "Contaminated: Flagged by users from the community for (possibly accidental) cheating",
173
+ // "Unavailable: No longer on the hub (private, deleted) or missing a license tag",
174
+ // ],
175
+ // },
176
+ // {
177
+ // label: "Purpose",
178
+ // description: "Why do people want to hide these models?",
179
+ // subItems: [
180
+ // "Mixture of Experts: These models can be too parameter heavy",
181
+ // "Merged models: Performance on benchmarks tend to be inflated compared to real life usage",
182
+ // "Contaminated: Performance on benchmarks is inflated and not reflecting real life usage",
183
+ // ],
184
+ // },
185
+ // ]),
186
 
187
+ // PARAMETERS: createTooltipContent("Model Parameters:", [
188
+ // {
189
+ // label: "Measurement",
190
+ // description: "Total number of trainable parameters in billions",
191
+ // subItems: [
192
+ // "Indicates model capacity and complexity",
193
+ // "Correlates with computational requirements",
194
+ // "Influences memory usage and inference speed",
195
+ // ],
196
+ // },
197
+ // ]),
198
 
199
+ // LICENSE: createTooltipContent("Model License Information:", [
200
+ // {
201
+ // label: "Importance",
202
+ // description: "Legal terms governing model usage and distribution",
203
+ // subItems: [
204
+ // "Commercial vs non-commercial use",
205
+ // "Attribution requirements",
206
+ // "Modification and redistribution rights",
207
+ // "Liability and warranty terms",
208
+ // ],
209
+ // },
210
+ // ]),
211
 
212
+ // CO2_COST: createTooltipContent("Carbon Dioxide Emissions:", [
213
+ // {
214
+ // label: "What is it?",
215
+ // description: "CO₂ emissions of the model evaluation ",
216
+ // subItems: [
217
+ // "Only focuses on model inference for our specific setup",
218
+ // "Considers data center location and energy mix",
219
+ // "Allows equivalent comparision of models on our use case",
220
+ // ],
221
+ // },
222
+ // {
223
+ // label: "Why it matters",
224
+ // description: "Environmental impact of AI model training",
225
+ // subItems: [
226
+ // "Large models can have significant carbon footprints",
227
+ // "Helps make informed choices about model selection",
228
+ // ],
229
+ // },
230
+ // {
231
+ // label: "Learn more",
232
+ // description:
233
+ // "For detailed information about our CO₂ calculation methodology, visit:",
234
+ // subItems: [
235
+ // <a
236
+ // href="https://huggingface.co/docs/leaderboards/open_llm_leaderboard/emissions"
237
+ // target="_blank"
238
+ // rel="noopener noreferrer"
239
+ // style={{ color: "#90caf9" }}
240
+ // >
241
+ // Carbon Emissions Documentation ↗
242
+ // </a>,
243
+ // ],
244
+ // },
245
+ // ]),
246
  };
247
 
248
  export const UI_TOOLTIPS = {
249
+ COLUMN_SELECTOR: {"en": "Choose which columns to display in the table", "fr": "Choix des colonnes à afficher dans le tableau"},
250
+ DISPLAY_OPTIONS: () => createTooltipContent({"en": "Table Display Options", "fr": "Options d'affichage du tableau" }, [
251
  {
252
+ label: {"en": "Overview", "fr": "Aperçu"},
253
+ description: {"en": "Configure how the table displays data and information", "fr": "Configure la manière dont le tableau affiche les données et l'information"},
254
  subItems: [
255
+ {"en": "Row size and layout", "fr": "Disposition et taille des lignes"},
256
+ {"en": "Score display format", "fr": "Format d'affichage des scores"},
257
+ {"en": "Ranking calculation", "fr": "Calcul du rang"},
258
+ {"en": "Average score computation", "fr": "Calcul du score moyen"},
259
  ],
260
  },
261
  ]),
262
+ SEARCH_BAR: () => createTooltipContent({
263
+ "en": "Advanced Model Search",
264
+ "fr": "Recherche avancée de modèles"
265
+ }, [
266
  {
267
+ label: {
268
+ "en": "Name Search",
269
+ "fr": "Recherche par nom"
270
+ },
271
+ description: {
272
+ "en": "Search directly by model name",
273
+ "fr": "Rechercher un modèle par son nom"
274
+ },
275
  subItems: [
276
+ {
277
+ "en": "Supports regular expressions (e.g., ^mistral.*7b)",
278
+ "fr": "Supporte les expressions régulières (par ex. ^mistral.*7b)"
279
+ },
280
+ {
281
+ "en": "Case sensitive",
282
+ "fr": "Sensible à la casse"
283
+ }
284
  ],
285
  },
286
  {
287
+ label: {"en": "Field Search"},
288
+ description: {"en": "Use @field:value syntax for precise filtering"},
289
  subItems: [
290
+ {"en": "@architecture:llama - Filter by architecture",},
291
+ {"en": "@license:mit - Filter by license",},
292
+ {"en": "@precision:float16 - Filter by precision"},
293
+ {"en": "@type:chat - Filter by model type"},
294
  ],
295
  },
296
  {
297
+ label: {"en": "Multiple Searches"},
298
+ description: {"en": "Combine multiple criteria using semicolons"},
299
  subItems: [
300
+ {"en": "meta @license:mit; @architecture:llama"},
301
+ {"en": "^mistral.*7b; @precision:float16"},
302
  ],
303
  },
304
  ]),
305
+ // QUICK_FILTERS: createTooltipContent(
306
+ // "Filter models based on their size and applicable hardware:",
307
+ // [
308
+ // {
309
+ // label: "Edge devices (Up to 3BB)",
310
+ // description:
311
+ // "Efficient models for edge devices, optimized for blazing fast inference.",
312
+ // },
313
+ // {
314
+ // label: "Smol Models (3B-7B)",
315
+ // description:
316
+ // "Efficient models for consumer hardware, optimized for fast inference.",
317
+ // },
318
+ // {
319
+ // label: "Mid-range models (7B-65B)",
320
+ // description:
321
+ // "A bit of everything here, with overall balanced performance and resource usage around 30B.",
322
+ // },
323
+ // {
324
+ // label: "GPU-rich models (65B+)",
325
+ // description:
326
+ // "State-of-the-art performance for complex tasks, requires significant computing power.",
327
+ // },
328
+ // {
329
+ // label: "Official Providers",
330
+ // description:
331
+ // "Models directly maintained by their original creators, ensuring reliability and up-to-date performance.",
332
+ // },
333
+ // ]
334
+ // ),
335
  ROW_SIZE: {
336
+ title: {"en": "Row Size", "fr": "Taille des lignes"},
337
  description:
338
+ {"en": "Adjust the height of table rows. Compact is ideal for viewing more data at once, while Large provides better readability and touch targets.",
339
+ "fr": "Ajustez la taille des lignes du tableau."
340
+ },
341
  },
342
  SCORE_DISPLAY: {
343
+ title: {"en": "Score Display", "fr": "Affichage du score"},
344
  description:
345
+ {"en": "Choose between normalized scores (0-100% scale for easy comparison) or raw scores (actual benchmark results). Normalized scores help compare performance across different benchmarks, while raw scores show actual benchmark outputs.",
346
+ "fr": "Afficher les scores normalisées (échelle 0-100% pour une comparaison facile) ou les scores bruts (les résultats du benchmark tels quels). Les scores normalisées aident à comparer la performance à travers les différents benchmarks alors que les scores bruts montrent les résultats bruts de chaque benchmark."
347
+ },
348
  },
349
  RANKING_MODE: {
350
+ title: {"en": "Ranking Mode", "fr": "Type de classement"},
351
  description:
352
+ {"en": "Choose between static ranking (original position in the full leaderboard) or dynamic ranking (position based on current filters and sorting).",
353
+ "fr": "Choisir entre un classement statique (position originale dans le leaderboard complet) ou bien un classement dynamique (basé sur les filtres actuels)."
354
+ },
355
  },
356
  AVERAGE_SCORE: {
357
+ title: {"en": "Average Score Calculation", "fr": "Calcul du score moyen"},
358
  description:
359
+ {"en": "Define how the average score is calculated. 'All Scores' uses all benchmarks, while 'Visible Only' calculates the average using only the visible benchmark columns.",
360
+ "fr": "Definir comment le score moyen est calculé. 'Tous' utilise tous les benchmarks, alors que 'Visibles' calcule le score moyen en utilisant uniquement les colonnes de benchmarks visibles."
361
+ },
362
  },
363
  };
364
 
frontend/src/pages/LeaderboardPage/components/Leaderboard/utils/columnUtils.jsx CHANGED
@@ -616,7 +616,7 @@ export const createColumns = (
616
  },
617
  {
618
  accessorKey: "model.average_score",
619
- header: createHeaderCell(headerNames.avg, COLUMN_TOOLTIPS.AVERAGE),
620
  cell: ({ row, getValue }) =>
621
  createScoreCell(getValue, row, "model.average_score"),
622
  size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"],
@@ -727,7 +727,7 @@ export const createColumns = (
727
  const evaluationColumns = [
728
  {
729
  accessorKey: "evaluations.ifeval_fr.normalized_score",
730
- header: createHeaderCell("IFEval FR", COLUMN_TOOLTIPS.IFEVALFR),
731
  cell: ({ row, getValue }) =>
732
  createScoreCell(getValue, row, "evaluations.ifeval_fr.normalized_score"),
733
  size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES[
@@ -736,7 +736,7 @@ export const createColumns = (
736
  },
737
  {
738
  accessorKey: "evaluations.gpqa_fr.normalized_score",
739
- header: createHeaderCell("GPQA FR", COLUMN_TOOLTIPS.GPQAFR),
740
  cell: ({ row, getValue }) =>
741
  createScoreCell(getValue, row, "evaluations.gpqa_fr.normalized_score"),
742
  size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES[
@@ -745,7 +745,7 @@ export const createColumns = (
745
  },
746
  {
747
  accessorKey: "evaluations.bac_fr.normalized_score",
748
- header: createHeaderCell("BAC FR", COLUMN_TOOLTIPS.BACFR),
749
  cell: ({ row, getValue }) =>
750
  createScoreCell(getValue, row, "evaluations.bac_fr.normalized_score"),
751
  size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES[
 
616
  },
617
  {
618
  accessorKey: "model.average_score",
619
+ header: createHeaderCell(headerNames.avg, COLUMN_TOOLTIPS.AVERAGE()),
620
  cell: ({ row, getValue }) =>
621
  createScoreCell(getValue, row, "model.average_score"),
622
  size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"],
 
727
  const evaluationColumns = [
728
  {
729
  accessorKey: "evaluations.ifeval_fr.normalized_score",
730
+ header: createHeaderCell("IFEval FR", COLUMN_TOOLTIPS.IFEVALFR()),
731
  cell: ({ row, getValue }) =>
732
  createScoreCell(getValue, row, "evaluations.ifeval_fr.normalized_score"),
733
  size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES[
 
736
  },
737
  {
738
  accessorKey: "evaluations.gpqa_fr.normalized_score",
739
+ header: createHeaderCell("GPQA FR", COLUMN_TOOLTIPS.GPQAFR()),
740
  cell: ({ row, getValue }) =>
741
  createScoreCell(getValue, row, "evaluations.gpqa_fr.normalized_score"),
742
  size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES[
 
745
  },
746
  {
747
  accessorKey: "evaluations.bac_fr.normalized_score",
748
+ header: createHeaderCell("BAC FR", COLUMN_TOOLTIPS.BACFR()),
749
  cell: ({ row, getValue }) =>
750
  createScoreCell(getValue, row, "evaluations.bac_fr.normalized_score"),
751
  size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES[