Spaces:
Sleeping
Sleeping
import time | |
import functools | |
from typing import Callable, Any, Dict, List | |
import torch | |
import psutil | |
import json | |
from evaluate import load | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
def time_function(func: Callable) -> Callable: | |
""" | |
Decorator to time function execution | |
""" | |
def wrapper(*args, **kwargs): | |
start_time = time.time() | |
result = func(*args, **kwargs) | |
end_time = time.time() | |
print(f"{func.__name__} took {end_time - start_time:.2f} seconds to execute") | |
return result | |
return wrapper | |
def evaluate_response(generated_response: str, ground_truth: str = None) -> Dict[str, Any]: | |
""" | |
Evaluate generated response with BLEU, ROUGE, and word overlap | |
""" | |
results = { | |
"length": len(generated_response), | |
"word_count": len(generated_response.split()) | |
} | |
if ground_truth: | |
bleu = load("bleu") | |
rouge = load("rouge") | |
bleu_score = bleu.compute(predictions=[generated_response], references=[[ground_truth]]) | |
rouge_score = rouge.compute(predictions=[generated_response], references=[ground_truth]) | |
generated_words = set(generated_response.lower().split()) | |
ground_truth_words = set(ground_truth.lower().split()) | |
overlap = len(generated_words.intersection(ground_truth_words)) | |
results.update({ | |
"bleu": bleu_score["bleu"], | |
"rouge": rouge_score["rougeL"], | |
"word_overlap": overlap / len(ground_truth_words) if ground_truth_words else 0 | |
}) | |
return results | |
def evaluate_retrieval(embedder, test_set_path: str, k: int = 3) -> Dict[str, float]: | |
""" | |
Evaluate retrieval quality with Precision@k and Recall@k | |
""" | |
with open(test_set_path, 'r') as f: | |
test_set = json.load(f) | |
precision, recall = [], [] | |
for item in test_set: | |
query = item['query'] | |
true_ids = set(item['relevant_ids']) | |
retrieved_faqs = embedder.retrieve_relevant_faqs(query, k) | |
retrieved_ids = set(range(len(retrieved_faqs))) | |
true_positives = len(true_ids & retrieved_ids) | |
precision.append(true_positives / k if k > 0 else 0) | |
recall.append(true_positives / len(true_ids) if true_ids else 0) | |
return { | |
"Precision@k": sum(precision) / len(precision) if precision else 0, | |
"Recall@k": sum(recall) / len(recall) if recall else 0 | |
} | |
def baseline_keyword_search(query: str, faqs: List[Dict[str, Any]], k: int = 3) -> List[Dict[str, Any]]: | |
""" | |
Keyword-based search baseline using TF-IDF | |
""" | |
questions = [faq['question'] for faq in faqs] | |
vectorizer = TfidfVectorizer() | |
question_vectors = vectorizer.fit_transform(questions) | |
query_vector = vectorizer.transform([query]) | |
similarities = cosine_similarity(query_vector, question_vectors).flatten() | |
top_k_indices = similarities.argsort()[-k:][::-1] | |
return [faqs[i] for i in top_k_indices] | |
def format_memory_stats(): | |
""" | |
Format memory usage statistics | |
""" | |
system_stats = { | |
"RAM": f"{psutil.virtual_memory().used / (1024 ** 3):.1f}GB / {psutil.virtual_memory().total / (1024 ** 3):.1f}GB", | |
"RAM Usage": f"{psutil.virtual_memory().percent}%" | |
} | |
if torch.cuda.is_available(): | |
gpu_stats = {} | |
for i in range(torch.cuda.device_count()): | |
gpu_stats[f"GPU {i}"] = f"{torch.cuda.get_device_name(i)}" | |
gpu_stats[f"GPU {i} Memory"] = f"{torch.cuda.memory_allocated(i) / (1024 ** 3):.1f}GB / {torch.cuda.get_device_properties(i).total_memory / (1024 ** 3):.1f}GB" | |
system_stats.update(gpu_stats) | |
return system_stats |