Spaces:
Sleeping
Sleeping
New changes
Browse files
app.py
CHANGED
|
@@ -588,9 +588,9 @@ def evaluate_rag_pipeline(domain, q_indices):
|
|
| 588 |
return round(np.sqrt(np.mean((np.array(gt) - np.array(pred)) ** 2)), 4)
|
| 589 |
|
| 590 |
result = {
|
| 591 |
-
"Context Relevance": compute_rmse(gt_relevance, pred_relevance),
|
| 592 |
-
"Context Utilization": compute_rmse(gt_utilization, pred_utilization),
|
| 593 |
-
"Completeness": compute_rmse(gt_completeness, pred_completeness),
|
| 594 |
}
|
| 595 |
|
| 596 |
if len(set(gt_adherence)) == 2:
|
|
@@ -636,7 +636,7 @@ iface = gr.Interface(
|
|
| 636 |
gr.JSON(label="Evaluation Metrics (RMSE & AUC-ROC)"),
|
| 637 |
gr.Textbox(label="Execution Log", lines=10, interactive=True),
|
| 638 |
],
|
| 639 |
-
title="RAG Evaluation Dashboard",
|
| 640 |
description="Evaluate your RAG pipeline across selected queries using LLM-based generation and judgment."
|
| 641 |
)
|
| 642 |
|
|
|
|
| 588 |
return round(np.sqrt(np.mean((np.array(gt) - np.array(pred)) ** 2)), 4)
|
| 589 |
|
| 590 |
result = {
|
| 591 |
+
"RMSE Context Relevance": compute_rmse(gt_relevance, pred_relevance),
|
| 592 |
+
"RMSE Context Utilization": compute_rmse(gt_utilization, pred_utilization),
|
| 593 |
+
"RMSE Completeness": compute_rmse(gt_completeness, pred_completeness),
|
| 594 |
}
|
| 595 |
|
| 596 |
if len(set(gt_adherence)) == 2:
|
|
|
|
| 636 |
gr.JSON(label="Evaluation Metrics (RMSE & AUC-ROC)"),
|
| 637 |
gr.Textbox(label="Execution Log", lines=10, interactive=True),
|
| 638 |
],
|
| 639 |
+
title=" RAG Evaluation Dashboard",
|
| 640 |
description="Evaluate your RAG pipeline across selected queries using LLM-based generation and judgment."
|
| 641 |
)
|
| 642 |
|