Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- evaluation.py +4 -2
evaluation.py
CHANGED
@@ -89,7 +89,9 @@ def adherence(response, relevant_documents):
|
|
89 |
|
90 |
# Step 6: Compute RMSE for metrics
|
91 |
def compute_rmse(predicted_values, ground_truth_values):
|
92 |
-
|
|
|
|
|
93 |
|
94 |
def calculate_metrics(question, q_dataset, response, docs, time_taken):
|
95 |
data = load_query_dataset(q_dataset)
|
@@ -131,7 +133,7 @@ def retrieve_ground_truths(question, dataset):
|
|
131 |
"context_relevance": instance['relevance_score'],
|
132 |
"context_utilization": instance['utilization_score'],
|
133 |
"completeness": instance['completeness_score'],
|
134 |
-
"adherence":
|
135 |
}
|
136 |
print(f"Match found in {split_name} split!")
|
137 |
print(f"ID: {instance_id}, Response: {ground_truth}")
|
|
|
89 |
|
90 |
# Step 6: Compute RMSE for metrics
|
91 |
def compute_rmse(predicted_values, ground_truth_values):
|
92 |
+
predicted_ = [float(v) for v in predicted_values.values()]
|
93 |
+
ground_truth_ = [float(v) if isinstance(v, (int, float)) else 0.75 if v is True else 0.25 for v in ground_truth_values.values()]
|
94 |
+
return np.sqrt(mean_squared_error(ground_truth_, predicted_))
|
95 |
|
96 |
def calculate_metrics(question, q_dataset, response, docs, time_taken):
|
97 |
data = load_query_dataset(q_dataset)
|
|
|
133 |
"context_relevance": instance['relevance_score'],
|
134 |
"context_utilization": instance['utilization_score'],
|
135 |
"completeness": instance['completeness_score'],
|
136 |
+
"adherence": instance['adherence_score']
|
137 |
}
|
138 |
print(f"Match found in {split_name} split!")
|
139 |
print(f"ID: {instance_id}, Response: {ground_truth}")
|