Spaces:
Sleeping
Sleeping
import numpy as np | |
from sklearn.metrics import mean_squared_error, roc_auc_score | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
from data_processing import load_query_dataset | |
global ground_truth_answer, ground_truth_metrics | |
ground_truth_answer = '' | |
ground_truth_metrics = {} | |
# def calculate_metrics(question, response, docs, time_taken): | |
# data = load_ragbench() | |
# retrieve_ground_truths(question, data) | |
# # Predicted metrics | |
# predicted_metrics = { | |
# "ground_truth": ground_truth_answer, | |
# "context_relevance": context_relevance(question, docs), | |
# "context_utilization": context_utilization(response, docs), | |
# "completeness": completeness(response, ground_truth_answer), | |
# "adherence": adherence(response, docs), | |
# "response_time" : time_taken | |
# } | |
# return predicted_metrics | |
# def retrieve_ground_truths(question,ragbench_set): | |
# for dataset_name in ragbench_set.keys(): | |
# for split_name,instances in ragbench_set[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation | |
# print(f"Processing {split_name} split") | |
# for instance in instances: # Fixed: Corrected indentation | |
# # Check if the question (data) matches the query | |
# if instance['question'] == question: | |
# # If a match is found, retrieve id and response | |
# instance_id = instance['id'] | |
# instance_response = instance['response'] | |
# ground_truth_metrics = { | |
# "context_relevance": instance['relevance_score'], | |
# "context_utilization": instance['utilization_score'], | |
# "completeness": instance['completeness_score'], | |
# "adherence": instance['adherence_score'] | |
# } | |
# ground_truth_answer = instance_response | |
# print(f"Match found in {split_name} split!") | |
# print(f"ID: {instance_id}, Response: {instance_response}") | |
# break # Exit after finding the first match (optional) | |
# Step 1: Helper function to compute cosine similarity | |
def compute_cosine_similarity(text1, text2): | |
if not text1 or not text2: # Check for empty or None values | |
print("Error: One or both input texts are empty. Returning similarity as 0.") | |
return 0.0 | |
vectorizer = TfidfVectorizer(stop_words="english") | |
try: | |
vectors = vectorizer.fit_transform([text1, text2]) | |
similarity = cosine_similarity(vectors[0], vectors[1])[0][0] | |
return similarity | |
except ValueError as e: | |
print(f"Error in vectorization: {e}. Returning similarity as 0.") | |
return 0.0 | |
# Step 2: Metric 1 - Context Relevance | |
def context_relevance(question, relevant_documents): | |
# combined_docs = " ".join([doc.page_content for doc in relevant_documents]) | |
combined_docs = " ".join([doc for doc in relevant_documents]) | |
return compute_cosine_similarity(question, combined_docs) | |
# Step 3: Metric 2 - Context Utilization | |
def context_utilization(response, relevant_documents): | |
#combined_docs = " ".join([doc.page_content for doc in relevant_documents]) | |
combined_docs = " ".join([doc for doc in relevant_documents]) | |
return compute_cosine_similarity(response, combined_docs) | |
# Step 4: Metric 3 - Completeness | |
def completeness(response, ground_truth_answer): | |
return compute_cosine_similarity(response, ground_truth_answer) | |
# Step 5: Metric 4 - Adherence | |
def adherence(response, relevant_documents): | |
#combined_docs = " ".join([doc.page_content for doc in relevant_documents]) | |
combined_docs = " ".join([doc for doc in relevant_documents]) | |
response_tokens = set(response.split()) | |
relevant_tokens = set(combined_docs.split()) | |
supported_tokens = response_tokens.intersection(relevant_tokens) | |
return len(supported_tokens) / len(response_tokens) | |
# Step 6: Compute RMSE for metrics | |
def compute_rmse(predicted_values, ground_truth_values): | |
return np.sqrt(mean_squared_error(ground_truth_values, predicted_values)) | |
def retrieve_ground_truths(question, dataset): | |
"""Retrieve the ground truth answer for a given question from the dataset.""" | |
for split_name, instances in dataset.items(): | |
for instance in instances: | |
if instance['question'] == question: | |
return instance['response'] # Return the ground truth response immediately | |
return None # Return None if no match is found | |
def calculate_metrics(question, q_dataset, response, docs, time_taken): | |
data = load_query_dataset(q_dataset) | |
ground_truth_answer = retrieve_ground_truths(question, data) # Store the ground truth answer | |
# Ensure ground_truth_answer is not empty before proceeding | |
if ground_truth_answer is None: | |
ground_truth_answer = "" # Default to an empty string if no ground truth is found | |
# Predicted metrics | |
predicted_metrics = { | |
"RAG_model_response": response, | |
"ground_truth": ground_truth_answer, | |
"context_relevance": context_relevance(question, docs), | |
"context_utilization": context_utilization(response, docs), | |
"completeness": completeness(response, ground_truth_answer), | |
"adherence": adherence(response, docs), | |
"response_time": time_taken | |
} | |
# If ground_truth_answer and predicted_metrics are available, compute RMSE | |
if ground_truth_answer and predicted_metrics: | |
# Assuming that we are calculating RMSE for completeness or other relevant metrics | |
rmse_value = compute_rmse([predicted_metrics['completeness']], [ground_truth_answer]) | |
predicted_metrics['rmse'] = rmse_value | |
return predicted_metrics | |
''' def retrieve_ground_truths(question, dataset): | |
for split_name, instances in dataset.items(): | |
print(f"Processing {split_name} split") | |
for instance in instances: | |
if instance['question'] == question: | |
instance_id = instance['id'] | |
instance_response = instance['response'] | |
# ground_truth_metrics = { | |
# "context_relevance": instance['relevance_score'], | |
# "context_utilization": instance['utilization_score'], | |
# "completeness": instance['completeness_score'], | |
# "adherence": instance['adherence_score'] | |
# } | |
print(f"Match found in {split_name} split!") | |
print(f"ID: {instance_id}, Response: {instance_response}") | |
return instance_response # Return ground truth response immediately | |
return None # Return None if no match is found | |
''' | |