cb1716pics commited on
Commit
58a211a
·
verified ·
1 Parent(s): 3309fe6

Upload 3 files

Browse files
Files changed (2) hide show
  1. data_processing.py +4 -4
  2. evaluation.py +71 -34
data_processing.py CHANGED
@@ -76,11 +76,11 @@ def create_faiss_index(dataset):
76
 
77
  def load_ragbench():
78
  global ragbench
79
- if ragbench is not None:
80
  return ragbench
81
- for dataset in ['covidqa', 'cuad', 'delucionqa', 'emanual', 'expertqa',
82
- 'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa',
83
- 'tatqa', 'techqa']:
84
  ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset)
85
  return ragbench
86
 
 
76
 
77
  def load_ragbench():
78
  global ragbench
79
+ if ragbench:
80
  return ragbench
81
+ datasets = ['covidqa', 'cuad', 'delucionqa', 'emanual', 'expertqa', 'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa',
82
+ 'tatqa', 'techqa']
83
+ for dataset in datasets:
84
  ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset)
85
  return ragbench
86
 
evaluation.py CHANGED
@@ -11,40 +11,40 @@ global ground_truth_answer, ground_truth_metrics
11
  ground_truth_answer = ''
12
  ground_truth_metrics = {}
13
 
14
- def calculate_metrics(question, response, docs, time_taken):
15
- data = load_ragbench()
16
- retrieve_ground_truths(question, data)
17
- # Predicted metrics
18
- predicted_metrics = {
19
- "ground_truth": ground_truth_answer,
20
- "context_relevance": context_relevance(question, docs),
21
- "context_utilization": context_utilization(response, docs),
22
- "completeness": completeness(response, ground_truth_answer),
23
- "adherence": adherence(response, docs),
24
- "response_time" : time_taken
25
- }
26
- return predicted_metrics
27
-
28
- def retrieve_ground_truths(question,ragbench_set):
29
- for dataset_name in ragbench_set.keys():
30
- for split_name,instances in ragbench_set[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation
31
- print(f"Processing {split_name} split")
32
- for instance in instances: # Fixed: Corrected indentation
33
- # Check if the question (data) matches the query
34
- if instance['question'] == question:
35
- # If a match is found, retrieve id and response
36
- instance_id = instance['id']
37
- instance_response = instance['response']
38
- ground_truth_metrics = {
39
- "context_relevance": instance['relevance_score'],
40
- "context_utilization": instance['utilization_score'],
41
- "completeness": instance['completeness_score'],
42
- "adherence": instance['adherence_score']
43
- }
44
- ground_truth_answer = instance_response
45
- print(f"Match found in {split_name} split!")
46
- print(f"ID: {instance_id}, Response: {instance_response}")
47
- break # Exit after finding the first match (optional)
48
 
49
  # Step 1: Helper function to compute cosine similarity
50
  def compute_cosine_similarity(text1, text2):
@@ -91,4 +91,41 @@ def adherence(response, relevant_documents):
91
  def compute_rmse(predicted_values, ground_truth_values):
92
  return np.sqrt(mean_squared_error(ground_truth_values, predicted_values))
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
 
 
11
  ground_truth_answer = ''
12
  ground_truth_metrics = {}
13
 
14
+ # def calculate_metrics(question, response, docs, time_taken):
15
+ # data = load_ragbench()
16
+ # retrieve_ground_truths(question, data)
17
+ # # Predicted metrics
18
+ # predicted_metrics = {
19
+ # "ground_truth": ground_truth_answer,
20
+ # "context_relevance": context_relevance(question, docs),
21
+ # "context_utilization": context_utilization(response, docs),
22
+ # "completeness": completeness(response, ground_truth_answer),
23
+ # "adherence": adherence(response, docs),
24
+ # "response_time" : time_taken
25
+ # }
26
+ # return predicted_metrics
27
+
28
+ # def retrieve_ground_truths(question,ragbench_set):
29
+ # for dataset_name in ragbench_set.keys():
30
+ # for split_name,instances in ragbench_set[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation
31
+ # print(f"Processing {split_name} split")
32
+ # for instance in instances: # Fixed: Corrected indentation
33
+ # # Check if the question (data) matches the query
34
+ # if instance['question'] == question:
35
+ # # If a match is found, retrieve id and response
36
+ # instance_id = instance['id']
37
+ # instance_response = instance['response']
38
+ # ground_truth_metrics = {
39
+ # "context_relevance": instance['relevance_score'],
40
+ # "context_utilization": instance['utilization_score'],
41
+ # "completeness": instance['completeness_score'],
42
+ # "adherence": instance['adherence_score']
43
+ # }
44
+ # ground_truth_answer = instance_response
45
+ # print(f"Match found in {split_name} split!")
46
+ # print(f"ID: {instance_id}, Response: {instance_response}")
47
+ # break # Exit after finding the first match (optional)
48
 
49
  # Step 1: Helper function to compute cosine similarity
50
  def compute_cosine_similarity(text1, text2):
 
91
  def compute_rmse(predicted_values, ground_truth_values):
92
  return np.sqrt(mean_squared_error(ground_truth_values, predicted_values))
93
 
94
+ def calculate_metrics(question, response, docs, time_taken):
95
+ data = load_ragbench()
96
+ ground_truth_answer = retrieve_ground_truths(question, data) # Store the ground truth answer
97
+
98
+ # Ensure ground_truth_answer is not empty before proceeding
99
+ if ground_truth_answer is None:
100
+ ground_truth_answer = "" # Default to an empty string if no ground truth is found
101
+
102
+ # Predicted metrics
103
+ predicted_metrics = {
104
+ "ground_truth": ground_truth_answer,
105
+ "context_relevance": context_relevance(question, docs),
106
+ "context_utilization": context_utilization(response, docs),
107
+ "completeness": completeness(response, ground_truth_answer),
108
+ "adherence": adherence(response, docs),
109
+ "response_time": time_taken
110
+ }
111
+ return predicted_metrics
112
+
113
+ def retrieve_ground_truths(question, ragbench_set):
114
+ for dataset_name in ragbench_set.keys():
115
+ for split_name, instances in ragbench_set[dataset_name].items():
116
+ print(f"Processing {split_name} split")
117
+ for instance in instances:
118
+ if instance['question'] == question:
119
+ instance_id = instance['id']
120
+ instance_response = instance['response']
121
+ # ground_truth_metrics = {
122
+ # "context_relevance": instance['relevance_score'],
123
+ # "context_utilization": instance['utilization_score'],
124
+ # "completeness": instance['completeness_score'],
125
+ # "adherence": instance['adherence_score']
126
+ # }
127
+ print(f"Match found in {split_name} split!")
128
+ print(f"ID: {instance_id}, Response: {instance_response}")
129
+ return instance_response # Return ground truth response immediately
130
 
131
+ return None # Return None if no match is found