Spaces:
Sleeping
Sleeping
Update evaluation.py
Browse files- evaluation.py +5 -5
evaluation.py
CHANGED
@@ -109,8 +109,8 @@ def retrieve_ground_truths(question, dataset):
|
|
109 |
"completeness": instance['completeness_score'],
|
110 |
"adherence": instance['adherence_score']
|
111 |
}
|
112 |
-
return
|
113 |
-
return None # Return None if no match is found
|
114 |
|
115 |
|
116 |
# Store RMSE for each metric in the global rmse_scores dictionary
|
@@ -129,7 +129,7 @@ def store_rmse(question, predicted_metrics, ground_truth_metrics):
|
|
129 |
|
130 |
def calculate_metrics(question, q_dataset, response, docs, time_taken):
|
131 |
data = load_query_dataset(q_dataset)
|
132 |
-
ground_truth_answer = retrieve_ground_truths(question, data) # Store the ground truth answer
|
133 |
|
134 |
# Ensure ground_truth_answer is not empty before proceeding
|
135 |
if ground_truth_answer is None:
|
@@ -154,7 +154,7 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
|
|
154 |
|
155 |
store_rmse(question, predicted_metrics, ground_truth_metrics)
|
156 |
# Now, make sure the values passed to RMSE calculation are numeric
|
157 |
-
predicted_completeness = predicted_metrics['completeness']
|
158 |
|
159 |
# Ensure both predicted_completeness and ground_truth_completeness are numeric before calculating RMSE
|
160 |
'''
|
@@ -165,7 +165,7 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
|
|
165 |
predicted_metrics["rmse"] = "Invalid RMSE calculation"
|
166 |
'''
|
167 |
if isinstance(predicted_completeness, (int, float)) and isinstance(ground_truth_completeness, (int, float)):
|
168 |
-
rmse_value = compute_rmse([
|
169 |
predicted_metrics["rmse"] = rmse_value # Adding RMSE to metrics
|
170 |
else:
|
171 |
predicted_metrics["rmse"] = "Invalid RMSE calculation"
|
|
|
109 |
"completeness": instance['completeness_score'],
|
110 |
"adherence": instance['adherence_score']
|
111 |
}
|
112 |
+
return instance_response, ground_truth_metrics # Return the ground truth response immediately
|
113 |
+
return None,None # Return None if no match is found
|
114 |
|
115 |
|
116 |
# Store RMSE for each metric in the global rmse_scores dictionary
|
|
|
129 |
|
130 |
def calculate_metrics(question, q_dataset, response, docs, time_taken):
|
131 |
data = load_query_dataset(q_dataset)
|
132 |
+
ground_truth_answer, ground_truth_metrics = retrieve_ground_truths(question, data) # Store the ground truth answer
|
133 |
|
134 |
# Ensure ground_truth_answer is not empty before proceeding
|
135 |
if ground_truth_answer is None:
|
|
|
154 |
|
155 |
store_rmse(question, predicted_metrics, ground_truth_metrics)
|
156 |
# Now, make sure the values passed to RMSE calculation are numeric
|
157 |
+
#predicted_completeness = predicted_metrics['completeness']
|
158 |
|
159 |
# Ensure both predicted_completeness and ground_truth_completeness are numeric before calculating RMSE
|
160 |
'''
|
|
|
165 |
predicted_metrics["rmse"] = "Invalid RMSE calculation"
|
166 |
'''
|
167 |
if isinstance(predicted_completeness, (int, float)) and isinstance(ground_truth_completeness, (int, float)):
|
168 |
+
rmse_value = compute_rmse([predicted_metrics], [ground_truth_metrics])
|
169 |
predicted_metrics["rmse"] = rmse_value # Adding RMSE to metrics
|
170 |
else:
|
171 |
predicted_metrics["rmse"] = "Invalid RMSE calculation"
|