23RAG7

Sleeping

App Files Files Community

cb1716pics commited on Feb 21

Commit

58a211a

verified ·

1 Parent(s): 3309fe6

Upload 3 files

Browse files

Files changed (2) hide show

data_processing.py +4 -4
evaluation.py +71 -34

data_processing.py CHANGED Viewed

@@ -76,11 +76,11 @@ def create_faiss_index(dataset):
 def load_ragbench():
     global ragbench
-    if ragbench is not None:
         return ragbench
-    for dataset in ['covidqa', 'cuad', 'delucionqa', 'emanual', 'expertqa',
-                    'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa',
-                    'tatqa', 'techqa']:
         ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset)
     return ragbench

 def load_ragbench():
     global ragbench
+    if ragbench:
         return ragbench
+    datasets = ['covidqa', 'cuad', 'delucionqa', 'emanual', 'expertqa', 'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa',
+                    'tatqa', 'techqa']
+    for dataset in datasets:
         ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset)
     return ragbench

evaluation.py CHANGED Viewed

@@ -11,40 +11,40 @@ global ground_truth_answer, ground_truth_metrics
 ground_truth_answer = ''
 ground_truth_metrics = {}
-def calculate_metrics(question, response, docs, time_taken):
-    data =  load_ragbench()
-    retrieve_ground_truths(question, data)
-    # Predicted metrics
-    predicted_metrics = {
-        "ground_truth": ground_truth_answer,
-        "context_relevance": context_relevance(question, docs),
-        "context_utilization": context_utilization(response, docs),
-        "completeness": completeness(response, ground_truth_answer),
-        "adherence": adherence(response, docs),
-        "response_time" : time_taken
-    }
-    return predicted_metrics
-def retrieve_ground_truths(question,ragbench_set):
-    for dataset_name in ragbench_set.keys():
-        for split_name,instances in ragbench_set[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation
-            print(f"Processing {split_name} split")
-            for instance in instances: # Fixed: Corrected indentation
-                # Check if the question (data) matches the query
-                if instance['question'] == question:
-                    # If a match is found, retrieve id and response
-                    instance_id = instance['id']
-                    instance_response = instance['response']
-                    ground_truth_metrics = {
-                        "context_relevance": instance['relevance_score'],
-                        "context_utilization": instance['utilization_score'],
-                        "completeness": instance['completeness_score'],
-                        "adherence": instance['adherence_score']
-                    }
-                    ground_truth_answer = instance_response
-                    print(f"Match found in {split_name} split!")
-                    print(f"ID: {instance_id}, Response: {instance_response}")
-                    break  # Exit after finding the first match (optional)
 # Step 1: Helper function to compute cosine similarity
 def compute_cosine_similarity(text1, text2):
@@ -91,4 +91,41 @@ def adherence(response, relevant_documents):
 def compute_rmse(predicted_values, ground_truth_values):
     return np.sqrt(mean_squared_error(ground_truth_values, predicted_values))

 ground_truth_answer = ''
 ground_truth_metrics = {}
+# def calculate_metrics(question, response, docs, time_taken):
+#     data =  load_ragbench()
+#     retrieve_ground_truths(question, data)
+#     # Predicted metrics
+#     predicted_metrics = {
+#         "ground_truth": ground_truth_answer,
+#         "context_relevance": context_relevance(question, docs),
+#         "context_utilization": context_utilization(response, docs),
+#         "completeness": completeness(response, ground_truth_answer),
+#         "adherence": adherence(response, docs),
+#         "response_time" : time_taken
+#     }
+#     return predicted_metrics
+# def retrieve_ground_truths(question,ragbench_set):
+#     for dataset_name in ragbench_set.keys():
+#         for split_name,instances in ragbench_set[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation
+#             print(f"Processing {split_name} split")
+#             for instance in instances: # Fixed: Corrected indentation
+#                 # Check if the question (data) matches the query
+#                 if instance['question'] == question:
+#                     # If a match is found, retrieve id and response
+#                     instance_id = instance['id']
+#                     instance_response = instance['response']
+#                     ground_truth_metrics = {
+#                         "context_relevance": instance['relevance_score'],
+#                         "context_utilization": instance['utilization_score'],
+#                         "completeness": instance['completeness_score'],
+#                         "adherence": instance['adherence_score']
+#                     }
+#                     ground_truth_answer = instance_response
+#                     print(f"Match found in {split_name} split!")
+#                     print(f"ID: {instance_id}, Response: {instance_response}")
+#                     break  # Exit after finding the first match (optional)
 # Step 1: Helper function to compute cosine similarity
 def compute_cosine_similarity(text1, text2):
 def compute_rmse(predicted_values, ground_truth_values):
     return np.sqrt(mean_squared_error(ground_truth_values, predicted_values))
+def calculate_metrics(question, response, docs, time_taken):
+    data = load_ragbench()
+    ground_truth_answer = retrieve_ground_truths(question, data)  # Store the ground truth answer
+    # Ensure ground_truth_answer is not empty before proceeding
+    if ground_truth_answer is None:
+        ground_truth_answer = ""  # Default to an empty string if no ground truth is found
+    # Predicted metrics
+    predicted_metrics = {
+        "ground_truth": ground_truth_answer,
+        "context_relevance": context_relevance(question, docs),
+        "context_utilization": context_utilization(response, docs),
+        "completeness": completeness(response, ground_truth_answer),
+        "adherence": adherence(response, docs),
+        "response_time": time_taken
+    }
+    return predicted_metrics
+def retrieve_ground_truths(question, ragbench_set):
+    for dataset_name in ragbench_set.keys():
+        for split_name, instances in ragbench_set[dataset_name].items():
+            print(f"Processing {split_name} split")
+            for instance in instances:
+                if instance['question'] == question:
+                    instance_id = instance['id']
+                    instance_response = instance['response']
+                    # ground_truth_metrics = {
+                    #     "context_relevance": instance['relevance_score'],
+                    #     "context_utilization": instance['utilization_score'],
+                    #     "completeness": instance['completeness_score'],
+                    #     "adherence": instance['adherence_score']
+                    # }
+                    print(f"Match found in {split_name} split!")
+                    print(f"ID: {instance_id}, Response: {instance_response}")
+                    return instance_response  # Return ground truth response immediately
+    return None  # Return None if no match is found