Spaces:
Runtime error
Runtime error
update outpus format.
Browse files- lingo_judge_metric.py +13 -2
lingo_judge_metric.py
CHANGED
|
@@ -30,7 +30,10 @@ Args:
|
|
| 30 |
`references` (list of list of str): Multiple references per question.
|
| 31 |
|
| 32 |
Returns:
|
| 33 |
-
`
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
Examples:
|
| 36 |
>>> metric = evaluate.load("maysonma/lingo_judge_metric")
|
|
@@ -74,4 +77,12 @@ class LingoJudgeMetric(evaluate.Metric):
|
|
| 74 |
def _compute(self, questions, predictions, references):
|
| 75 |
"""Returns the scores"""
|
| 76 |
scores = self.scorer.compute(questions, references, predictions)
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
`references` (list of list of str): Multiple references per question.
|
| 31 |
|
| 32 |
Returns:
|
| 33 |
+
`score` (list of float): Lingo-Judge score.
|
| 34 |
+
`probability` (list of float): Probability of the prediction being correct.
|
| 35 |
+
`correct` (list of bool): Whether the prediction is correct.
|
| 36 |
+
`benchmark_score` (float): Benchmark score.
|
| 37 |
|
| 38 |
Examples:
|
| 39 |
>>> metric = evaluate.load("maysonma/lingo_judge_metric")
|
|
|
|
| 77 |
def _compute(self, questions, predictions, references):
|
| 78 |
"""Returns the scores"""
|
| 79 |
scores = self.scorer.compute(questions, references, predictions)
|
| 80 |
+
probability = torch.sigmoid(scores)
|
| 81 |
+
correct = scores > 0.0
|
| 82 |
+
benchmark_score = float(torch.sum(correct).item() / len(correct))
|
| 83 |
+
return {
|
| 84 |
+
"score": scores.cpu().tolist(),
|
| 85 |
+
"probability": probability.cpu().tolist(),
|
| 86 |
+
"correct": correct.cpu().tolist(),
|
| 87 |
+
"benchmark_score": benchmark_score,
|
| 88 |
+
}
|