vamseelatha2002 commited on
Commit
cc5bac0
·
verified ·
1 Parent(s): d56c634

Update evaluation.py

Browse files
Files changed (1) hide show
  1. evaluation.py +16 -7
evaluation.py CHANGED
@@ -109,22 +109,31 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
109
  # Ensure ground_truth_answer is not empty before proceeding
110
  if ground_truth_answer is None:
111
  ground_truth_answer = "" # Default to an empty string if no ground truth is found
112
-
 
 
 
 
113
  # Predicted metrics
114
  predicted_metrics = {
115
  "RAG_model_response": response,
116
  "ground_truth": ground_truth_answer,
117
  "context_relevance": context_relevance(question, docs),
118
  "context_utilization": context_utilization(response, docs),
119
- "completeness": completeness(response, ground_truth_answer),
120
  "adherence": adherence(response, docs),
121
  "response_time": time_taken
122
  }
123
- # If ground_truth_answer and predicted_metrics are available, compute RMSE
124
- if ground_truth_answer and predicted_metrics:
125
- # Assuming that we are calculating RMSE for completeness or other relevant metrics
126
- rmse_value = compute_rmse([predicted_metrics['completeness']], [ground_truth_answer])
127
- predicted_metrics['rmse'] = rmse_value
 
 
 
 
 
128
  return predicted_metrics
129
 
130
  ''' def retrieve_ground_truths(question, dataset):
 
109
  # Ensure ground_truth_answer is not empty before proceeding
110
  if ground_truth_answer is None:
111
  ground_truth_answer = "" # Default to an empty string if no ground truth is found
112
+
113
+ # Convert ground truth to numeric form (e.g., using cosine similarity or some metric)
114
+ # Here, let's assume completeness is based on cosine similarity between the response and the ground truth
115
+ ground_truth_completeness = compute_cosine_similarity(response, ground_truth_answer)
116
+
117
  # Predicted metrics
118
  predicted_metrics = {
119
  "RAG_model_response": response,
120
  "ground_truth": ground_truth_answer,
121
  "context_relevance": context_relevance(question, docs),
122
  "context_utilization": context_utilization(response, docs),
123
+ "completeness": compute_cosine_similarity(response, ground_truth_answer), #completeness(response, ground_truth_answer),
124
  "adherence": adherence(response, docs),
125
  "response_time": time_taken
126
  }
127
+ # Now, make sure the values passed to RMSE calculation are numeric
128
+ predicted_completeness = predicted_metrics['completeness']
129
+
130
+ # Ensure both predicted_completeness and ground_truth_completeness are numeric before calculating RMSE
131
+ if isinstance(predicted_completeness, (int, float)) and isinstance(ground_truth_completeness, (int, float)):
132
+ rmse_value = compute_rmse([predicted_completeness], [ground_truth_completeness])
133
+ predicted_metrics["rmse"] = rmse_value # Adding RMSE to metrics
134
+ else:
135
+ predicted_metrics["rmse"] = "Invalid RMSE calculation"
136
+
137
  return predicted_metrics
138
 
139
  ''' def retrieve_ground_truths(question, dataset):