Spaces:
Sleeping
Sleeping
Update evaluation.py
Browse files- evaluation.py +13 -10
evaluation.py
CHANGED
@@ -156,16 +156,7 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
|
|
156 |
ground_truth_completeness = compute_cosine_similarity(response, ground_truth_answer)
|
157 |
|
158 |
# Predicted metrics
|
159 |
-
|
160 |
-
"RAG_model_response": response,
|
161 |
-
"ground_truth": ground_truth_answer,
|
162 |
-
"context_relevance": context_relevance(question, docs),
|
163 |
-
"context_utilization": context_utilization(response, docs),
|
164 |
-
"completeness": compute_cosine_similarity(response, ground_truth_answer), #completeness(response, ground_truth_answer),
|
165 |
-
"adherence": adherence(response, docs),
|
166 |
-
"response_time": time_taken,
|
167 |
-
"ground truth completeness": ground_truth_completeness
|
168 |
-
}
|
169 |
|
170 |
# Predicted metrics
|
171 |
predicted_metrics_rmse = {
|
@@ -218,7 +209,19 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
|
|
218 |
predicted_metrics_rmse["rmse"] = rmse_value # Adding RMSE to metrics
|
219 |
else:
|
220 |
predicted_metrics_rmse["rmse"] = "Invalid RMSE calculation"
|
|
|
221 |
'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
return predicted_metrics
|
223 |
|
224 |
''' def retrieve_ground_truths(question, dataset):
|
|
|
156 |
ground_truth_completeness = compute_cosine_similarity(response, ground_truth_answer)
|
157 |
|
158 |
# Predicted metrics
|
159 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
# Predicted metrics
|
162 |
predicted_metrics_rmse = {
|
|
|
209 |
predicted_metrics_rmse["rmse"] = rmse_value # Adding RMSE to metrics
|
210 |
else:
|
211 |
predicted_metrics_rmse["rmse"] = "Invalid RMSE calculation"
|
212 |
+
|
213 |
'''
|
214 |
+
predicted_metrics = {
|
215 |
+
"RAG_model_response": response,
|
216 |
+
"ground_truth": ground_truth_answer,
|
217 |
+
"context_relevance": context_relevance(question, docs),
|
218 |
+
"context_utilization": context_utilization(response, docs),
|
219 |
+
"completeness": compute_cosine_similarity(response, ground_truth_answer), #completeness(response, ground_truth_answer),
|
220 |
+
"adherence": adherence(response, docs),
|
221 |
+
"response_time": time_taken,
|
222 |
+
"ground truth completeness": ground_truth_completeness,
|
223 |
+
"rmse": overall_rmse
|
224 |
+
}
|
225 |
return predicted_metrics
|
226 |
|
227 |
''' def retrieve_ground_truths(question, dataset):
|