Spaces:
Running
Running
Update evaluate_performance.py
Browse files- evaluate_performance.py +2 -13
evaluate_performance.py
CHANGED
|
@@ -23,15 +23,13 @@ import argparse
|
|
| 23 |
from datetime import datetime
|
| 24 |
import matplotlib.pyplot as plt
|
| 25 |
from tabulate import tabulate
|
| 26 |
-
import numpy as np
|
| 27 |
|
| 28 |
# Add the parent directory to sys.path if this script is run directly
|
| 29 |
if __name__ == "__main__":
|
| 30 |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 31 |
|
| 32 |
-
# Import the agent
|
| 33 |
import agent
|
| 34 |
-
from utils.performance import PerformanceTracker
|
| 35 |
from utils.models import initialize_models
|
| 36 |
|
| 37 |
# IMPORTANT NOTE FOR DEVELOPERS:
|
|
@@ -193,8 +191,6 @@ def evaluate_claims(test_claims, eval_agent, limit=None):
|
|
| 193 |
- results (list): Detailed results for each claim
|
| 194 |
- metrics (dict): Aggregated performance metrics
|
| 195 |
"""
|
| 196 |
-
# Initialize performance tracker
|
| 197 |
-
performance_tracker = PerformanceTracker()
|
| 198 |
|
| 199 |
# Limit the number of claims if requested
|
| 200 |
if limit and limit > 0:
|
|
@@ -499,7 +495,6 @@ def main():
|
|
| 499 |
|
| 500 |
# Evaluate claims
|
| 501 |
results, metrics = evaluate_claims(TEST_CLAIMS, eval_agent, args.limit)
|
| 502 |
-
# results, metrics = evaluate_claims(TEST_CLAIMS, eval_agent, 1)
|
| 503 |
|
| 504 |
# Print summary
|
| 505 |
print_summary(metrics)
|
|
@@ -508,13 +503,7 @@ def main():
|
|
| 508 |
save_results(results, metrics, output_file)
|
| 509 |
|
| 510 |
# Create charts
|
| 511 |
-
|
| 512 |
-
from tabulate import tabulate
|
| 513 |
-
import matplotlib.pyplot as plt
|
| 514 |
-
create_charts(metrics, results_dir)
|
| 515 |
-
except ImportError:
|
| 516 |
-
print("\nCould not create charts. Please install matplotlib and tabulate packages:")
|
| 517 |
-
print("pip install matplotlib tabulate")
|
| 518 |
|
| 519 |
if __name__ == "__main__":
|
| 520 |
main()
|
|
|
|
| 23 |
from datetime import datetime
|
| 24 |
import matplotlib.pyplot as plt
|
| 25 |
from tabulate import tabulate
|
|
|
|
| 26 |
|
| 27 |
# Add the parent directory to sys.path if this script is run directly
|
| 28 |
if __name__ == "__main__":
|
| 29 |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 30 |
|
| 31 |
+
# Import the agent
|
| 32 |
import agent
|
|
|
|
| 33 |
from utils.models import initialize_models
|
| 34 |
|
| 35 |
# IMPORTANT NOTE FOR DEVELOPERS:
|
|
|
|
| 191 |
- results (list): Detailed results for each claim
|
| 192 |
- metrics (dict): Aggregated performance metrics
|
| 193 |
"""
|
|
|
|
|
|
|
| 194 |
|
| 195 |
# Limit the number of claims if requested
|
| 196 |
if limit and limit > 0:
|
|
|
|
| 495 |
|
| 496 |
# Evaluate claims
|
| 497 |
results, metrics = evaluate_claims(TEST_CLAIMS, eval_agent, args.limit)
|
|
|
|
| 498 |
|
| 499 |
# Print summary
|
| 500 |
print_summary(metrics)
|
|
|
|
| 503 |
save_results(results, metrics, output_file)
|
| 504 |
|
| 505 |
# Create charts
|
| 506 |
+
create_charts(metrics, results_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
|
| 508 |
if __name__ == "__main__":
|
| 509 |
main()
|