vamseelatha2002 commited on
Commit
06e5962
·
verified ·
1 Parent(s): 9f08148

Update evaluation.py

Browse files
Files changed (1) hide show
  1. evaluation.py +5 -5
evaluation.py CHANGED
@@ -109,8 +109,8 @@ def retrieve_ground_truths(question, dataset):
109
  "completeness": instance['completeness_score'],
110
  "adherence": instance['adherence_score']
111
  }
112
- return instance['response'] # Return the ground truth response immediately
113
- return None # Return None if no match is found
114
 
115
 
116
  # Store RMSE for each metric in the global rmse_scores dictionary
@@ -129,7 +129,7 @@ def store_rmse(question, predicted_metrics, ground_truth_metrics):
129
 
130
  def calculate_metrics(question, q_dataset, response, docs, time_taken):
131
  data = load_query_dataset(q_dataset)
132
- ground_truth_answer = retrieve_ground_truths(question, data) # Store the ground truth answer
133
 
134
  # Ensure ground_truth_answer is not empty before proceeding
135
  if ground_truth_answer is None:
@@ -154,7 +154,7 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
154
 
155
  store_rmse(question, predicted_metrics, ground_truth_metrics)
156
  # Now, make sure the values passed to RMSE calculation are numeric
157
- predicted_completeness = predicted_metrics['completeness']
158
 
159
  # Ensure both predicted_completeness and ground_truth_completeness are numeric before calculating RMSE
160
  '''
@@ -165,7 +165,7 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
165
  predicted_metrics["rmse"] = "Invalid RMSE calculation"
166
  '''
167
  if isinstance(predicted_completeness, (int, float)) and isinstance(ground_truth_completeness, (int, float)):
168
- rmse_value = compute_rmse([predicted_completeness], [ground_truth_completeness])
169
  predicted_metrics["rmse"] = rmse_value # Adding RMSE to metrics
170
  else:
171
  predicted_metrics["rmse"] = "Invalid RMSE calculation"
 
109
  "completeness": instance['completeness_score'],
110
  "adherence": instance['adherence_score']
111
  }
112
+ return instance_response, ground_truth_metrics # Return the ground truth response immediately
113
+ return None,None # Return None if no match is found
114
 
115
 
116
  # Store RMSE for each metric in the global rmse_scores dictionary
 
129
 
130
  def calculate_metrics(question, q_dataset, response, docs, time_taken):
131
  data = load_query_dataset(q_dataset)
132
+ ground_truth_answer, ground_truth_metrics = retrieve_ground_truths(question, data) # Store the ground truth answer
133
 
134
  # Ensure ground_truth_answer is not empty before proceeding
135
  if ground_truth_answer is None:
 
154
 
155
  store_rmse(question, predicted_metrics, ground_truth_metrics)
156
  # Now, make sure the values passed to RMSE calculation are numeric
157
+ #predicted_completeness = predicted_metrics['completeness']
158
 
159
  # Ensure both predicted_completeness and ground_truth_completeness are numeric before calculating RMSE
160
  '''
 
165
  predicted_metrics["rmse"] = "Invalid RMSE calculation"
166
  '''
167
  if isinstance(predicted_completeness, (int, float)) and isinstance(ground_truth_completeness, (int, float)):
168
+ rmse_value = compute_rmse([predicted_metrics], [ground_truth_metrics])
169
  predicted_metrics["rmse"] = rmse_value # Adding RMSE to metrics
170
  else:
171
  predicted_metrics["rmse"] = "Invalid RMSE calculation"