import numpy as np from sklearn.metrics import mean_squared_error, roc_auc_score from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from data_processing import load_ragbench ground_truth_answer = '' ground_truth_metrics = {} def calculate_metrics(question, response, docs, time_taken): data = load_ragbench() retrieve_ground_truths(question, data) # Predicted metrics predicted_metrics = { "context_relevance": context_relevance(question, docs), "context_utilization": context_utilization(response, docs), "completeness": completeness(response, ground_truth_answer), "adherence": adherence(response, docs), "response_time" : time_taken } return predicted_metrics def retrieve_ground_truths(question,ragbench_set): # Iterate through all splits (train, test, validation) for dataset_name in ragbench_set.keys(): for split_name,instances in ragbench_set[dataset_name].items(): # Fixed: Removed extra '.' and corrected indentation print(f"Processing {split_name} split") for instance in instances: # Fixed: Corrected indentation # Check if the question (data) matches the query if instance['question'] == question: # If a match is found, retrieve id and response instance_id = instance['id'] instance_response = instance['response'] ground_truth_metrics = { "context_relevance": instance['relevance_score'], "context_utilization": instance['utilization_score'], "completeness": instance['completeness_score'], "adherence": instance['adherence_score'] } ground_truth_answer = instance_response print(f"Match found in {split_name} split!") print(f"ID: {instance_id}, Response: {instance_response}") break # Exit after finding the first match (optional) # Step 1: Helper function to compute cosine similarity def compute_cosine_similarity(text1, text2): vectorizer = TfidfVectorizer() vectors = vectorizer.fit_transform([text1, text2]) return cosine_similarity(vectors[0], vectors[1])[0][0] # Step 2: Metric 1 - Context Relevance def context_relevance(question, relevant_documents): combined_docs = " ".join([doc.page_content for doc in relevant_documents]) return compute_cosine_similarity(question, combined_docs) # Step 3: Metric 2 - Context Utilization def context_utilization(response, relevant_documents): combined_docs = " ".join([doc.page_content for doc in relevant_documents]) return compute_cosine_similarity(response, combined_docs) # Step 4: Metric 3 - Completeness def completeness(response, ground_truth_answer): return compute_cosine_similarity(response, ground_truth_answer) # Step 5: Metric 4 - Adherence def adherence(response, relevant_documents): combined_docs = " ".join([doc.page_content for doc in relevant_documents]) response_tokens = set(response.split()) relevant_tokens = set(combined_docs.split()) supported_tokens = response_tokens.intersection(relevant_tokens) return len(supported_tokens) / len(response_tokens) # Step 6: Compute RMSE for metrics def compute_rmse(predicted_values, ground_truth_values): return np.sqrt(mean_squared_error(ground_truth_values, predicted_values))