cb1716pics commited on
Commit
454c8e8
·
verified ·
1 Parent(s): 2d788f8

Upload 2 files

Browse files
Files changed (1) hide show
  1. evaluation.py +4 -2
evaluation.py CHANGED
@@ -89,7 +89,9 @@ def adherence(response, relevant_documents):
89
 
90
  # Step 6: Compute RMSE for metrics
91
  def compute_rmse(predicted_values, ground_truth_values):
92
- return np.sqrt(mean_squared_error(ground_truth_values, predicted_values))
 
 
93
 
94
  def calculate_metrics(question, q_dataset, response, docs, time_taken):
95
  data = load_query_dataset(q_dataset)
@@ -131,7 +133,7 @@ def retrieve_ground_truths(question, dataset):
131
  "context_relevance": instance['relevance_score'],
132
  "context_utilization": instance['utilization_score'],
133
  "completeness": instance['completeness_score'],
134
- "adherence": 0.75 if instance['adherence_score'] is True else 0.25
135
  }
136
  print(f"Match found in {split_name} split!")
137
  print(f"ID: {instance_id}, Response: {ground_truth}")
 
89
 
90
  # Step 6: Compute RMSE for metrics
91
  def compute_rmse(predicted_values, ground_truth_values):
92
+ predicted_ = [float(v) for v in predicted_values.values()]
93
+ ground_truth_ = [float(v) if isinstance(v, (int, float)) else 0.75 if v is True else 0.25 for v in ground_truth_values.values()]
94
+ return np.sqrt(mean_squared_error(ground_truth_, predicted_))
95
 
96
  def calculate_metrics(question, q_dataset, response, docs, time_taken):
97
  data = load_query_dataset(q_dataset)
 
133
  "context_relevance": instance['relevance_score'],
134
  "context_utilization": instance['utilization_score'],
135
  "completeness": instance['completeness_score'],
136
+ "adherence": instance['adherence_score']
137
  }
138
  print(f"Match found in {split_name} split!")
139
  print(f"ID: {instance_id}, Response: {ground_truth}")