Spaces:
Sleeping
Sleeping
import numpy as np | |
from sklearn.metrics import mean_squared_error, roc_auc_score | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
from data_processing import load_ragbench | |
ground_truth_answer = '' | |
ground_truth_metrics = {} | |
def calculate_metrics(question, response, docs, time_taken): | |
data = load_ragbench() | |
retrieve_ground_truths(question, data) | |
# Predicted metrics | |
predicted_metrics = { | |
"context_relevance": context_relevance(question, docs), | |
"context_utilization": context_utilization(response, docs), | |
"completeness": completeness(response, ground_truth_answer), | |
"adherence": adherence(response, docs), | |
"response_time" : time_taken | |
} | |
return predicted_metrics | |
def retrieve_ground_truths(question,ragbench_set): | |
# Iterate through all splits (train, test, validation) | |
for dataset_name in ragbench_set.keys(): | |
for split_name,instances in ragbench_set[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation | |
print(f"Processing {split_name} split") | |
for instance in instances: # Fixed: Corrected indentation | |
# Check if the question (data) matches the query | |
if instance['question'] == question: | |
# If a match is found, retrieve id and response | |
instance_id = instance['id'] | |
instance_response = instance['response'] | |
ground_truth_metrics = { | |
"context_relevance": instance['relevance_score'], | |
"context_utilization": instance['utilization_score'], | |
"completeness": instance['completeness_score'], | |
"adherence": instance['adherence_score'] | |
} | |
ground_truth_answer = instance_response | |
print(f"Match found in {split_name} split!") | |
print(f"ID: {instance_id}, Response: {instance_response}") | |
break # Exit after finding the first match (optional) | |
# Step 1: Helper function to compute cosine similarity | |
def compute_cosine_similarity(text1, text2): | |
vectorizer = TfidfVectorizer() | |
vectors = vectorizer.fit_transform([text1, text2]) | |
return cosine_similarity(vectors[0], vectors[1])[0][0] | |
# Step 2: Metric 1 - Context Relevance | |
def context_relevance(question, relevant_documents): | |
combined_docs = " ".join([doc.page_content for doc in relevant_documents]) | |
return compute_cosine_similarity(question, combined_docs) | |
# Step 3: Metric 2 - Context Utilization | |
def context_utilization(response, relevant_documents): | |
combined_docs = " ".join([doc.page_content for doc in relevant_documents]) | |
return compute_cosine_similarity(response, combined_docs) | |
# Step 4: Metric 3 - Completeness | |
def completeness(response, ground_truth_answer): | |
return compute_cosine_similarity(response, ground_truth_answer) | |
# Step 5: Metric 4 - Adherence | |
def adherence(response, relevant_documents): | |
combined_docs = " ".join([doc.page_content for doc in relevant_documents]) | |
response_tokens = set(response.split()) | |
relevant_tokens = set(combined_docs.split()) | |
supported_tokens = response_tokens.intersection(relevant_tokens) | |
return len(supported_tokens) / len(response_tokens) | |
# Step 6: Compute RMSE for metrics | |
def compute_rmse(predicted_values, ground_truth_values): | |
return np.sqrt(mean_squared_error(ground_truth_values, predicted_values)) | |