23RAG7

Sleeping

App Files Files Community

vamseelatha2002 commited on Feb 22

Commit

cc5bac0

verified ·

1 Parent(s): d56c634

Update evaluation.py

Browse files

Files changed (1) hide show

evaluation.py +16 -7

evaluation.py CHANGED Viewed

@@ -109,22 +109,31 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
     # Ensure ground_truth_answer is not empty before proceeding
     if ground_truth_answer is None:
         ground_truth_answer = ""  # Default to an empty string if no ground truth is found
     # Predicted metrics
     predicted_metrics = {
         "RAG_model_response": response,
         "ground_truth": ground_truth_answer,
         "context_relevance": context_relevance(question, docs),
         "context_utilization": context_utilization(response, docs),
-        "completeness": completeness(response, ground_truth_answer),
         "adherence": adherence(response, docs),
         "response_time": time_taken
     }
-     # If ground_truth_answer and predicted_metrics are available, compute RMSE
-    if ground_truth_answer and predicted_metrics:
-        # Assuming that we are calculating RMSE for completeness or other relevant metrics
-        rmse_value = compute_rmse([predicted_metrics['completeness']], [ground_truth_answer])
-        predicted_metrics['rmse'] = rmse_value
     return predicted_metrics
 ''' def retrieve_ground_truths(question, dataset):

     # Ensure ground_truth_answer is not empty before proceeding
     if ground_truth_answer is None:
         ground_truth_answer = ""  # Default to an empty string if no ground truth is found
+    # Convert ground truth to numeric form (e.g., using cosine similarity or some metric)
+    # Here, let's assume completeness is based on cosine similarity between the response and the ground truth
+    ground_truth_completeness = compute_cosine_similarity(response, ground_truth_answer)
     # Predicted metrics
     predicted_metrics = {
         "RAG_model_response": response,
         "ground_truth": ground_truth_answer,
         "context_relevance": context_relevance(question, docs),
         "context_utilization": context_utilization(response, docs),
+        "completeness": compute_cosine_similarity(response, ground_truth_answer), #completeness(response, ground_truth_answer),
         "adherence": adherence(response, docs),
         "response_time": time_taken
     }
+     # Now, make sure the values passed to RMSE calculation are numeric
+    predicted_completeness = predicted_metrics['completeness']
+    # Ensure both predicted_completeness and ground_truth_completeness are numeric before calculating RMSE
+    if isinstance(predicted_completeness, (int, float)) and isinstance(ground_truth_completeness, (int, float)):
+        rmse_value = compute_rmse([predicted_completeness], [ground_truth_completeness])
+        predicted_metrics["rmse"] = rmse_value  # Adding RMSE to metrics
+    else:
+        predicted_metrics["rmse"] = "Invalid RMSE calculation"
     return predicted_metrics
 ''' def retrieve_ground_truths(question, dataset):