devjas1 commited on
Commit
05d496e
·
1 Parent(s): 7779f44

(FEAT)[Performance Analytics]: Implement performance tracking utility and dashboard

Browse files

- Introduced 'PerformanceMetrics' dataclass to encapsulate fields for inference performance logging.
- Added 'get_performance_tracker()' function to provide a singleton tracker instance.
- Implemented 'PerformanceTracker' class to handle logging, storage, and retrieval of performance metrics to the SQLite database.
- Added methods for logging metrics, aggregating statistics, and exporting analytics reports.
- Created 'display_performance_dashboard()' for Streamlit integration, visualizing metrics (e.g., inference times, confidence, memory usage, accuracy) with charts and tables.
- Error handling and database connection management included for robustness.

Files changed (1) hide show
  1. utils/performance_tracker.py +404 -0
utils/performance_tracker.py ADDED
@@ -0,0 +1,404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Performance tracking and logging utilities for POLYMEROS platform."""
2
+
3
+ import time
4
+ import json
5
+ import sqlite3
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from typing import Dict, List, Any, Optional
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ import streamlit as st
12
+ from dataclasses import dataclass, asdict
13
+ from contextlib import contextmanager
14
+
15
+
16
+ @dataclass
17
+ class PerformanceMetrics:
18
+ """Data class for performance metrics."""
19
+
20
+ model_name: str
21
+ prediction_time: float
22
+ preprocessing_time: float
23
+ total_time: float
24
+ memory_usage_mb: float
25
+ accuracy: Optional[float]
26
+ confidence: float
27
+ timestamp: str
28
+ input_size: int
29
+ modality: str
30
+
31
+ def to_dict(self) -> Dict[str, Any]:
32
+ return asdict(self)
33
+
34
+
35
+ class PerformanceTracker:
36
+ """Automatic performance tracking and logging system."""
37
+
38
+ def __init__(self, db_path: str = "outputs/performance_tracking.db"):
39
+ self.db_path = Path(db_path)
40
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
41
+ self._init_database()
42
+
43
+ def _init_database(self):
44
+ """Initialize SQLite database for performance tracking."""
45
+ with sqlite3.connect(self.db_path) as conn:
46
+ conn.execute(
47
+ """
48
+ CREATE TABLE IF NOT EXISTS performance_metrics (
49
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
50
+ model_name TEXT NOT NULL,
51
+ prediction_time REAL NOT NULL,
52
+ preprocessing_time REAL NOT NULL,
53
+ total_time REAL NOT NULL,
54
+ memory_usage_mb REAL,
55
+ accuracy REAL,
56
+ confidence REAL NOT NULL,
57
+ timestamp TEXT NOT NULL,
58
+ input_size INTEGER NOT NULL,
59
+ modality TEXT NOT NULL
60
+ )
61
+ """
62
+ )
63
+ conn.commit()
64
+
65
+ def log_performance(self, metrics: PerformanceMetrics):
66
+ """Log performance metrics to database."""
67
+ with sqlite3.connect(self.db_path) as conn:
68
+ conn.execute(
69
+ """
70
+ INSERT INTO performance_metrics
71
+ (model_name, prediction_time, preprocessing_time, total_time,
72
+ memory_usage_mb, accuracy, confidence, timestamp, input_size, modality)
73
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
74
+ """,
75
+ (
76
+ metrics.model_name,
77
+ metrics.prediction_time,
78
+ metrics.preprocessing_time,
79
+ metrics.total_time,
80
+ metrics.memory_usage_mb,
81
+ metrics.accuracy,
82
+ metrics.confidence,
83
+ metrics.timestamp,
84
+ metrics.input_size,
85
+ metrics.modality,
86
+ ),
87
+ )
88
+ conn.commit()
89
+
90
+ @contextmanager
91
+ def track_inference(self, model_name: str, modality: str = "raman"):
92
+ """Context manager for automatic performance tracking."""
93
+ start_time = time.time()
94
+ start_memory = self._get_memory_usage()
95
+
96
+ tracking_data = {
97
+ "model_name": model_name,
98
+ "modality": modality,
99
+ "start_time": start_time,
100
+ "start_memory": start_memory,
101
+ "preprocessing_time": 0.0,
102
+ }
103
+
104
+ try:
105
+ yield tracking_data
106
+ finally:
107
+ end_time = time.time()
108
+ end_memory = self._get_memory_usage()
109
+
110
+ total_time = end_time - start_time
111
+ memory_usage = max(end_memory - start_memory, 0)
112
+
113
+ # Create metrics object if not provided
114
+ if "metrics" not in tracking_data:
115
+ metrics = PerformanceMetrics(
116
+ model_name=model_name,
117
+ prediction_time=tracking_data.get("prediction_time", total_time),
118
+ preprocessing_time=tracking_data.get("preprocessing_time", 0.0),
119
+ total_time=total_time,
120
+ memory_usage_mb=memory_usage,
121
+ accuracy=tracking_data.get("accuracy"),
122
+ confidence=tracking_data.get("confidence", 0.0),
123
+ timestamp=datetime.now().isoformat(),
124
+ input_size=tracking_data.get("input_size", 0),
125
+ modality=modality,
126
+ )
127
+ self.log_performance(metrics)
128
+
129
+ def _get_memory_usage(self) -> float:
130
+ """Get current memory usage in MB."""
131
+ try:
132
+ import psutil
133
+
134
+ process = psutil.Process()
135
+ return process.memory_info().rss / 1024 / 1024 # Convert to MB
136
+ except ImportError:
137
+ return 0.0 # psutil not available
138
+
139
+ def get_recent_metrics(self, limit: int = 100) -> List[Dict[str, Any]]:
140
+ """Get recent performance metrics."""
141
+ with sqlite3.connect(self.db_path) as conn:
142
+ conn.row_factory = sqlite3.Row # Enable column access by name
143
+ cursor = conn.execute(
144
+ """
145
+ SELECT * FROM performance_metrics
146
+ ORDER BY timestamp DESC
147
+ LIMIT ?
148
+ """,
149
+ (limit,),
150
+ )
151
+ return [dict(row) for row in cursor.fetchall()]
152
+
153
+ def get_model_statistics(self, model_name: Optional[str] = None) -> Dict[str, Any]:
154
+ """Get statistical summary of model performance."""
155
+ where_clause = "WHERE model_name = ?" if model_name else ""
156
+ params = (model_name,) if model_name else ()
157
+
158
+ with sqlite3.connect(self.db_path) as conn:
159
+ cursor = conn.execute(
160
+ f"""
161
+ SELECT
162
+ model_name,
163
+ COUNT(*) as total_inferences,
164
+ AVG(prediction_time) as avg_prediction_time,
165
+ AVG(preprocessing_time) as avg_preprocessing_time,
166
+ AVG(total_time) as avg_total_time,
167
+ AVG(memory_usage_mb) as avg_memory_usage,
168
+ AVG(confidence) as avg_confidence,
169
+ MIN(total_time) as fastest_inference,
170
+ MAX(total_time) as slowest_inference
171
+ FROM performance_metrics
172
+ {where_clause}
173
+ GROUP BY model_name
174
+ """,
175
+ params,
176
+ )
177
+
178
+ results = cursor.fetchall()
179
+ if model_name and results:
180
+ # Return single model stats as dict
181
+ row = results[0]
182
+ return {
183
+ "model_name": row[0],
184
+ "total_inferences": row[1],
185
+ "avg_prediction_time": row[2],
186
+ "avg_preprocessing_time": row[3],
187
+ "avg_total_time": row[4],
188
+ "avg_memory_usage": row[5],
189
+ "avg_confidence": row[6],
190
+ "fastest_inference": row[7],
191
+ "slowest_inference": row[8],
192
+ }
193
+ elif not model_name:
194
+ # Return all models stats as dict of dicts
195
+ return {
196
+ row[0]: {
197
+ "model_name": row[0],
198
+ "total_inferences": row[1],
199
+ "avg_prediction_time": row[2],
200
+ "avg_preprocessing_time": row[3],
201
+ "avg_total_time": row[4],
202
+ "avg_memory_usage": row[5],
203
+ "avg_confidence": row[6],
204
+ "fastest_inference": row[7],
205
+ "slowest_inference": row[8],
206
+ }
207
+ for row in results
208
+ }
209
+ else:
210
+ return {}
211
+
212
+ def create_performance_visualization(self) -> plt.Figure:
213
+ """Create performance visualization charts."""
214
+ metrics = self.get_recent_metrics(50)
215
+
216
+ if not metrics:
217
+ return None
218
+
219
+ fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(12, 8))
220
+
221
+ # Convert to convenient format
222
+ models = [m["model_name"] for m in metrics]
223
+ times = [m["total_time"] for m in metrics]
224
+ confidences = [m["confidence"] for m in metrics]
225
+ timestamps = [datetime.fromisoformat(m["timestamp"]) for m in metrics]
226
+
227
+ # 1. Inference Time Over Time
228
+ ax1.plot(timestamps, times, "o-", alpha=0.7)
229
+ ax1.set_title("Inference Time Over Time")
230
+ ax1.set_ylabel("Time (seconds)")
231
+ ax1.tick_params(axis="x", rotation=45)
232
+
233
+ # 2. Performance by Model
234
+ model_stats = self.get_model_statistics()
235
+ if model_stats:
236
+ model_names = list(model_stats.keys())
237
+ avg_times = [model_stats[m]["avg_total_time"] for m in model_names]
238
+
239
+ ax2.bar(model_names, avg_times, alpha=0.7)
240
+ ax2.set_title("Average Inference Time by Model")
241
+ ax2.set_ylabel("Time (seconds)")
242
+ ax2.tick_params(axis="x", rotation=45)
243
+
244
+ # 3. Confidence Distribution
245
+ ax3.hist(confidences, bins=20, alpha=0.7)
246
+ ax3.set_title("Confidence Score Distribution")
247
+ ax3.set_xlabel("Confidence")
248
+ ax3.set_ylabel("Frequency")
249
+
250
+ # 4. Memory Usage if available
251
+ memory_usage = [
252
+ m["memory_usage_mb"] for m in metrics if m["memory_usage_mb"] is not None
253
+ ]
254
+ if memory_usage:
255
+ ax4.plot(range(len(memory_usage)), memory_usage, "o-", alpha=0.7)
256
+ ax4.set_title("Memory Usage")
257
+ ax4.set_xlabel("Inference Number")
258
+ ax4.set_ylabel("Memory (MB)")
259
+ else:
260
+ ax4.text(
261
+ 0.5,
262
+ 0.5,
263
+ "Memory tracking\nnot available",
264
+ ha="center",
265
+ va="center",
266
+ transform=ax4.transAxes,
267
+ )
268
+ ax4.set_title("Memory Usage")
269
+
270
+ plt.tight_layout()
271
+ return fig
272
+
273
+ def export_metrics(self, format: str = "json") -> str:
274
+ """Export performance metrics in specified format."""
275
+ metrics = self.get_recent_metrics(1000) # Get more for export
276
+
277
+ if format == "json":
278
+ return json.dumps(metrics, indent=2, default=str)
279
+ elif format == "csv":
280
+ import pandas as pd
281
+
282
+ df = pd.DataFrame(metrics)
283
+ return df.to_csv(index=False)
284
+ else:
285
+ raise ValueError(f"Unsupported format: {format}")
286
+
287
+
288
+ # Global tracker instance
289
+ _tracker = None
290
+
291
+
292
+ def get_performance_tracker() -> PerformanceTracker:
293
+ """Get global performance tracker instance."""
294
+ global _tracker
295
+ if _tracker is None:
296
+ _tracker = PerformanceTracker()
297
+ return _tracker
298
+
299
+
300
+ def display_performance_dashboard():
301
+ """Display performance tracking dashboard in Streamlit."""
302
+ tracker = get_performance_tracker()
303
+
304
+ st.markdown("### 📈 Performance Dashboard")
305
+
306
+ # Recent metrics summary
307
+ recent_metrics = tracker.get_recent_metrics(20)
308
+
309
+ if not recent_metrics:
310
+ st.info(
311
+ "No performance data available yet. Run some inferences to see metrics."
312
+ )
313
+ return
314
+
315
+ # Summary statistics
316
+ col1, col2, col3, col4 = st.columns(4)
317
+
318
+ total_inferences = len(recent_metrics)
319
+ avg_time = np.mean([m["total_time"] for m in recent_metrics])
320
+ avg_confidence = np.mean([m["confidence"] for m in recent_metrics])
321
+ unique_models = len(set(m["model_name"] for m in recent_metrics))
322
+
323
+ with col1:
324
+ st.metric("Total Inferences", total_inferences)
325
+ with col2:
326
+ st.metric("Avg Time", f"{avg_time:.3f}s")
327
+ with col3:
328
+ st.metric("Avg Confidence", f"{avg_confidence:.3f}")
329
+ with col4:
330
+ st.metric("Models Used", unique_models)
331
+
332
+ # Performance visualization
333
+ fig = tracker.create_performance_visualization()
334
+ if fig:
335
+ st.pyplot(fig)
336
+
337
+ # Model comparison table
338
+ st.markdown("#### Model Performance Comparison")
339
+ model_stats = tracker.get_model_statistics()
340
+
341
+ if model_stats:
342
+ import pandas as pd
343
+
344
+ stats_data = []
345
+ for model_name, stats in model_stats.items():
346
+ stats_data.append(
347
+ {
348
+ "Model": model_name,
349
+ "Total Inferences": stats["total_inferences"],
350
+ "Avg Time (s)": f"{stats['avg_total_time']:.3f}",
351
+ "Avg Confidence": f"{stats['avg_confidence']:.3f}",
352
+ "Fastest (s)": f"{stats['fastest_inference']:.3f}",
353
+ "Slowest (s)": f"{stats['slowest_inference']:.3f}",
354
+ }
355
+ )
356
+
357
+ df = pd.DataFrame(stats_data)
358
+ st.dataframe(df, use_container_width=True)
359
+
360
+ # Export options
361
+ with st.expander("📥 Export Performance Data"):
362
+ col1, col2 = st.columns(2)
363
+
364
+ with col1:
365
+ if st.button("Export JSON"):
366
+ json_data = tracker.export_metrics("json")
367
+ st.download_button(
368
+ "Download JSON",
369
+ json_data,
370
+ "performance_metrics.json",
371
+ "application/json",
372
+ )
373
+
374
+ with col2:
375
+ if st.button("Export CSV"):
376
+ csv_data = tracker.export_metrics("csv")
377
+ st.download_button(
378
+ "Download CSV", csv_data, "performance_metrics.csv", "text/csv"
379
+ )
380
+
381
+
382
+ if __name__ == "__main__":
383
+ # Test the performance tracker
384
+ tracker = PerformanceTracker()
385
+
386
+ # Simulate some metrics
387
+ for i in range(5):
388
+ metrics = PerformanceMetrics(
389
+ model_name=f"test_model_{i%2}",
390
+ prediction_time=0.1 + i * 0.01,
391
+ preprocessing_time=0.05,
392
+ total_time=0.15 + i * 0.01,
393
+ memory_usage_mb=100 + i * 10,
394
+ accuracy=0.8 + i * 0.02,
395
+ confidence=0.7 + i * 0.05,
396
+ timestamp=datetime.now().isoformat(),
397
+ input_size=500,
398
+ modality="raman",
399
+ )
400
+ tracker.log_performance(metrics)
401
+
402
+ print("Performance tracking test completed!")
403
+ print(f"Recent metrics: {len(tracker.get_recent_metrics())}")
404
+ print(f"Model stats: {tracker.get_model_statistics()}")