23RAG7

Sleeping

App Files Files Community

vamseelatha2002 commited on Feb 22

Commit

06e5962

verified ·

1 Parent(s): 9f08148

Update evaluation.py

Browse files

Files changed (1) hide show

evaluation.py +5 -5

evaluation.py CHANGED Viewed

@@ -109,8 +109,8 @@ def retrieve_ground_truths(question, dataset):
                     "completeness": instance['completeness_score'],
                     "adherence": instance['adherence_score']
                 }
-                return instance['response']  # Return the ground truth response immediately
-    return None  # Return None if no match is found
 # Store RMSE for each metric in the global rmse_scores dictionary
@@ -129,7 +129,7 @@ def store_rmse(question, predicted_metrics, ground_truth_metrics):
 def calculate_metrics(question, q_dataset, response, docs, time_taken):
     data = load_query_dataset(q_dataset)
-    ground_truth_answer = retrieve_ground_truths(question, data)  # Store the ground truth answer
     # Ensure ground_truth_answer is not empty before proceeding
     if ground_truth_answer is None:
@@ -154,7 +154,7 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
     store_rmse(question, predicted_metrics, ground_truth_metrics)
      # Now, make sure the values passed to RMSE calculation are numeric
-    predicted_completeness = predicted_metrics['completeness']
     # Ensure both predicted_completeness and ground_truth_completeness are numeric before calculating RMSE
     '''
@@ -165,7 +165,7 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
         predicted_metrics["rmse"] = "Invalid RMSE calculation"
         '''
     if isinstance(predicted_completeness, (int, float)) and isinstance(ground_truth_completeness, (int, float)):
-        rmse_value = compute_rmse([predicted_completeness], [ground_truth_completeness])
         predicted_metrics["rmse"] = rmse_value  # Adding RMSE to metrics
     else:
         predicted_metrics["rmse"] = "Invalid RMSE calculation"

                     "completeness": instance['completeness_score'],
                     "adherence": instance['adherence_score']
                 }
+                return instance_response, ground_truth_metrics  # Return the ground truth response immediately
+    return None,None  # Return None if no match is found
 # Store RMSE for each metric in the global rmse_scores dictionary
 def calculate_metrics(question, q_dataset, response, docs, time_taken):
     data = load_query_dataset(q_dataset)
+    ground_truth_answer, ground_truth_metrics = retrieve_ground_truths(question, data)  # Store the ground truth answer
     # Ensure ground_truth_answer is not empty before proceeding
     if ground_truth_answer is None:
     store_rmse(question, predicted_metrics, ground_truth_metrics)
      # Now, make sure the values passed to RMSE calculation are numeric
+    #predicted_completeness = predicted_metrics['completeness']
     # Ensure both predicted_completeness and ground_truth_completeness are numeric before calculating RMSE
     '''
         predicted_metrics["rmse"] = "Invalid RMSE calculation"
         '''
     if isinstance(predicted_completeness, (int, float)) and isinstance(ground_truth_completeness, (int, float)):
+        rmse_value = compute_rmse([predicted_metrics], [ground_truth_metrics])
         predicted_metrics["rmse"] = rmse_value  # Adding RMSE to metrics
     else:
         predicted_metrics["rmse"] = "Invalid RMSE calculation"