Spaces:
Sleeping
Sleeping
File size: 6,819 Bytes
5f5fc92 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import numpy as np
from sklearn.metrics import mean_squared_error, roc_auc_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from data_processing import load_query_dataset
global ground_truth_answer, ground_truth_metrics
ground_truth_answer = ''
ground_truth_metrics = {}
# def calculate_metrics(question, response, docs, time_taken):
# data = load_ragbench()
# retrieve_ground_truths(question, data)
# # Predicted metrics
# predicted_metrics = {
# "ground_truth": ground_truth_answer,
# "context_relevance": context_relevance(question, docs),
# "context_utilization": context_utilization(response, docs),
# "completeness": completeness(response, ground_truth_answer),
# "adherence": adherence(response, docs),
# "response_time" : time_taken
# }
# return predicted_metrics
# def retrieve_ground_truths(question,ragbench_set):
# for dataset_name in ragbench_set.keys():
# for split_name,instances in ragbench_set[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation
# print(f"Processing {split_name} split")
# for instance in instances: # Fixed: Corrected indentation
# # Check if the question (data) matches the query
# if instance['question'] == question:
# # If a match is found, retrieve id and response
# instance_id = instance['id']
# instance_response = instance['response']
# ground_truth_metrics = {
# "context_relevance": instance['relevance_score'],
# "context_utilization": instance['utilization_score'],
# "completeness": instance['completeness_score'],
# "adherence": instance['adherence_score']
# }
# ground_truth_answer = instance_response
# print(f"Match found in {split_name} split!")
# print(f"ID: {instance_id}, Response: {instance_response}")
# break # Exit after finding the first match (optional)
# Step 1: Helper function to compute cosine similarity
def compute_cosine_similarity(text1, text2):
if not text1 or not text2: # Check for empty or None values
print("Error: One or both input texts are empty. Returning similarity as 0.")
return 0.0
vectorizer = TfidfVectorizer(stop_words="english")
try:
vectors = vectorizer.fit_transform([text1, text2])
similarity = cosine_similarity(vectors[0], vectors[1])[0][0]
return similarity
except ValueError as e:
print(f"Error in vectorization: {e}. Returning similarity as 0.")
return 0.0
# Step 2: Metric 1 - Context Relevance
def context_relevance(question, relevant_documents):
# combined_docs = " ".join([doc.page_content for doc in relevant_documents])
combined_docs = " ".join([doc for doc in relevant_documents])
return compute_cosine_similarity(question, combined_docs)
# Step 3: Metric 2 - Context Utilization
def context_utilization(response, relevant_documents):
#combined_docs = " ".join([doc.page_content for doc in relevant_documents])
combined_docs = " ".join([doc for doc in relevant_documents])
return compute_cosine_similarity(response, combined_docs)
# Step 4: Metric 3 - Completeness
def completeness(response, ground_truth_answer):
return compute_cosine_similarity(response, ground_truth_answer)
# Step 5: Metric 4 - Adherence
def adherence(response, relevant_documents):
#combined_docs = " ".join([doc.page_content for doc in relevant_documents])
combined_docs = " ".join([doc for doc in relevant_documents])
response_tokens = set(response.split())
relevant_tokens = set(combined_docs.split())
supported_tokens = response_tokens.intersection(relevant_tokens)
return len(supported_tokens) / len(response_tokens)
# Step 6: Compute RMSE for metrics
def compute_rmse(predicted_values, ground_truth_values):
return np.sqrt(mean_squared_error(ground_truth_values, predicted_values))
def retrieve_ground_truths(question, dataset):
"""Retrieve the ground truth answer for a given question from the dataset."""
for split_name, instances in dataset.items():
for instance in instances:
if instance['question'] == question:
return instance['response'] # Return the ground truth response immediately
return None # Return None if no match is found
def calculate_metrics(question, q_dataset, response, docs, time_taken):
data = load_query_dataset(q_dataset)
ground_truth_answer = retrieve_ground_truths(question, data) # Store the ground truth answer
# Ensure ground_truth_answer is not empty before proceeding
if ground_truth_answer is None:
ground_truth_answer = "" # Default to an empty string if no ground truth is found
# Predicted metrics
predicted_metrics = {
"RAG_model_response": response,
"ground_truth": ground_truth_answer,
"context_relevance": context_relevance(question, docs),
"context_utilization": context_utilization(response, docs),
"completeness": completeness(response, ground_truth_answer),
"adherence": adherence(response, docs),
"response_time": time_taken
}
# If ground_truth_answer and predicted_metrics are available, compute RMSE
if ground_truth_answer and predicted_metrics:
# Assuming that we are calculating RMSE for completeness or other relevant metrics
rmse_value = compute_rmse([predicted_metrics['completeness']], [ground_truth_answer])
predicted_metrics['rmse'] = rmse_value
return predicted_metrics
''' def retrieve_ground_truths(question, dataset):
for split_name, instances in dataset.items():
print(f"Processing {split_name} split")
for instance in instances:
if instance['question'] == question:
instance_id = instance['id']
instance_response = instance['response']
# ground_truth_metrics = {
# "context_relevance": instance['relevance_score'],
# "context_utilization": instance['utilization_score'],
# "completeness": instance['completeness_score'],
# "adherence": instance['adherence_score']
# }
print(f"Match found in {split_name} split!")
print(f"ID: {instance_id}, Response: {instance_response}")
return instance_response # Return ground truth response immediately
return None # Return None if no match is found
'''
|