Skitzo-4152 commited on
Commit
2c223c8
·
verified ·
1 Parent(s): 0404350

Upload utils___init__.py

Browse files
Files changed (1) hide show
  1. utils/utils___init__.py +371 -0
utils/utils___init__.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Utility functions for ChipVerifyAI
4
+ RTL parsing, metrics calculation, and visualization helpers
5
+ """
6
+
7
+ import re
8
+ import pandas as pd
9
+ import numpy as np
10
+ from typing import Dict, List, Any, Optional
11
+ from pathlib import Path
12
+ import plotly.graph_objects as go
13
+ import plotly.express as px
14
+ from plotly.subplots import make_subplots
15
+
16
+ class RTLParser:
17
+ """Parse RTL files to extract design features"""
18
+
19
+ def __init__(self):
20
+ # Regex patterns for RTL parsing
21
+ self.patterns = {
22
+ 'module': r'\bmodule\s+(\w+)',
23
+ 'always_block': r'\balways\s*[@\(\)]',
24
+ 'assign': r'\bassign\s+',
25
+ 'if_statement': r'\bif\s*\(',
26
+ 'case_statement': r'\bcase\s*\(',
27
+ 'for_loop': r'\bfor\s*\(',
28
+ 'function': r'\bfunction\s+',
29
+ 'task': r'\btask\s+',
30
+ 'signal': r'(?:wire|reg|logic)\s+(?:\[[^\]]+\])?\s*(\w+)',
31
+ 'clock': r'\b(?:clk|clock)\b',
32
+ 'reset': r'\b(?:rst|reset)\b',
33
+ 'memory': r'\b(?:ram|rom|memory|mem)\b',
34
+ 'fsm': r'\b(?:state|fsm|STATE|FSM)\b'
35
+ }
36
+
37
+ self.compiled_patterns = {k: re.compile(v, re.IGNORECASE)
38
+ for k, v in self.patterns.items()}
39
+
40
+ def parse_rtl_content(self, content: str) -> Dict[str, Any]:
41
+ """Parse RTL content and extract features"""
42
+ features = {
43
+ 'lines_of_code': len(content.splitlines()),
44
+ 'module_count': 0,
45
+ 'signal_count': 0,
46
+ 'always_blocks': 0,
47
+ 'assign_statements': 0,
48
+ 'if_statements': 0,
49
+ 'case_statements': 0,
50
+ 'for_loops': 0,
51
+ 'function_count': 0,
52
+ 'task_count': 0,
53
+ 'clock_signals': 0,
54
+ 'reset_signals': 0,
55
+ 'has_memory': False,
56
+ 'has_fsm': False,
57
+ 'complexity_score': 0.0
58
+ }
59
+
60
+ try:
61
+ # Count occurrences
62
+ features['module_count'] = len(self.compiled_patterns['module'].findall(content))
63
+ features['always_blocks'] = len(self.compiled_patterns['always_block'].findall(content))
64
+ features['assign_statements'] = len(self.compiled_patterns['assign'].findall(content))
65
+ features['if_statements'] = len(self.compiled_patterns['if_statement'].findall(content))
66
+ features['case_statements'] = len(self.compiled_patterns['case_statement'].findall(content))
67
+ features['for_loops'] = len(self.compiled_patterns['for_loop'].findall(content))
68
+ features['function_count'] = len(self.compiled_patterns['function'].findall(content))
69
+ features['task_count'] = len(self.compiled_patterns['task'].findall(content))
70
+
71
+ # Extract signal names
72
+ signals = self.compiled_patterns['signal'].findall(content)
73
+ features['signal_count'] = len(set(signals)) # Unique signals
74
+
75
+ # Check for specific features
76
+ features['clock_signals'] = len(self.compiled_patterns['clock'].findall(content))
77
+ features['reset_signals'] = len(self.compiled_patterns['reset'].findall(content))
78
+ features['has_memory'] = bool(self.compiled_patterns['memory'].search(content))
79
+ features['has_fsm'] = bool(self.compiled_patterns['fsm'].search(content))
80
+
81
+ # Calculate complexity score
82
+ features['complexity_score'] = self._calculate_complexity(features)
83
+
84
+ except Exception as e:
85
+ print(f"Warning: RTL parsing error: {e}")
86
+
87
+ return features
88
+
89
+ def _calculate_complexity(self, features: Dict[str, Any]) -> float:
90
+ """Calculate design complexity score"""
91
+ # Weighted complexity calculation
92
+ weights = {
93
+ 'lines_of_code': 0.0001,
94
+ 'module_count': 0.5,
95
+ 'always_blocks': 0.3,
96
+ 'if_statements': 0.1,
97
+ 'case_statements': 0.2,
98
+ 'for_loops': 0.3,
99
+ 'function_count': 0.2,
100
+ 'task_count': 0.2,
101
+ 'has_memory': 1.0,
102
+ 'has_fsm': 0.8
103
+ }
104
+
105
+ complexity = 0.0
106
+ for feature, weight in weights.items():
107
+ if feature in features:
108
+ value = features[feature]
109
+ if isinstance(value, bool):
110
+ value = int(value)
111
+ complexity += value * weight
112
+
113
+ return round(complexity, 2)
114
+
115
+ class DataPreprocessor:
116
+ """Preprocess data for ML training"""
117
+
118
+ def __init__(self):
119
+ self.feature_columns = [
120
+ 'lines_of_code', 'module_count', 'signal_count', 'always_blocks',
121
+ 'assign_statements', 'if_statements', 'case_statements', 'for_loops',
122
+ 'function_count', 'task_count', 'clock_domains', 'reset_signals',
123
+ 'interface_signals', 'memory_instances', 'fsm_count', 'pipeline_stages',
124
+ 'arithmetic_units', 'complexity_score', 'has_memory', 'has_fsm',
125
+ 'has_pipeline', 'has_floating_point', 'is_complex', 'is_large'
126
+ ]
127
+
128
+ def preprocess_for_ml(self, df: pd.DataFrame) -> pd.DataFrame:
129
+ """Preprocess DataFrame for ML training"""
130
+ processed_df = df.copy()
131
+
132
+ # Fill missing values
133
+ for col in self.feature_columns:
134
+ if col in processed_df.columns:
135
+ if processed_df[col].dtype == 'bool':
136
+ processed_df[col] = processed_df[col].fillna(False)
137
+ else:
138
+ processed_df[col] = processed_df[col].fillna(processed_df[col].median())
139
+
140
+ # Convert boolean columns to int
141
+ bool_columns = processed_df.select_dtypes(include=['bool']).columns
142
+ processed_df[bool_columns] = processed_df[bool_columns].astype(int)
143
+
144
+ # Remove outliers
145
+ processed_df = self._remove_outliers(processed_df)
146
+
147
+ return processed_df
148
+
149
+ def _remove_outliers(self, df: pd.DataFrame, threshold: float = 3.0) -> pd.DataFrame:
150
+ """Remove outliers using Z-score method"""
151
+ numeric_columns = df.select_dtypes(include=[np.number]).columns
152
+
153
+ for col in numeric_columns:
154
+ if col in df.columns:
155
+ z_scores = np.abs((df[col] - df[col].mean()) / df[col].std())
156
+ df = df[z_scores < threshold]
157
+
158
+ return df
159
+
160
+ def create_risk_dashboard(analysis_results: Dict[str, Any]) -> go.Figure:
161
+ """Create risk assessment dashboard visualization"""
162
+
163
+ # Extract data
164
+ risk_score = analysis_results.get('risk_score', 0)
165
+ ml_analysis = analysis_results.get('ml_analysis', {})
166
+ bug_probability = ml_analysis.get('bug_probability', 0) if isinstance(ml_analysis, dict) else 0
167
+ complexity = analysis_results.get('complexity_score', analysis_results.get('complexity_estimate', 0))
168
+
169
+ # Create subplots
170
+ fig = make_subplots(
171
+ rows=2, cols=2,
172
+ specs=[[{"type": "indicator"}, {"type": "indicator"}],
173
+ [{"type": "bar"}, {"type": "scatter"}]],
174
+ subplot_titles=("Overall Risk Score", "Bug Probability",
175
+ "Risk Factors", "Complexity vs Risk")
176
+ )
177
+
178
+ # Risk score gauge
179
+ fig.add_trace(
180
+ go.Indicator(
181
+ mode="gauge+number+delta",
182
+ value=risk_score * 100,
183
+ domain={'x': [0, 1], 'y': [0, 1]},
184
+ title={'text': "Risk %"},
185
+ gauge={
186
+ 'axis': {'range': [None, 100]},
187
+ 'bar': {'color': "darkblue"},
188
+ 'steps': [
189
+ {'range': [0, 40], 'color': "lightgray"},
190
+ {'range': [40, 70], 'color': "yellow"},
191
+ {'range': [70, 100], 'color': "red"}
192
+ ],
193
+ 'threshold': {
194
+ 'line': {'color': "red", 'width': 4},
195
+ 'thickness': 0.75,
196
+ 'value': 90
197
+ }
198
+ }
199
+ ),
200
+ row=1, col=1
201
+ )
202
+
203
+ # Bug probability gauge
204
+ fig.add_trace(
205
+ go.Indicator(
206
+ mode="gauge+number",
207
+ value=bug_probability * 100,
208
+ domain={'x': [0, 1], 'y': [0, 1]},
209
+ title={'text': "Bug Probability %"},
210
+ gauge={
211
+ 'axis': {'range': [None, 100]},
212
+ 'bar': {'color': "darkred"},
213
+ 'steps': [
214
+ {'range': [0, 30], 'color': "green"},
215
+ {'range': [30, 60], 'color': "yellow"},
216
+ {'range': [60, 100], 'color': "red"}
217
+ ]
218
+ }
219
+ ),
220
+ row=1, col=2
221
+ )
222
+
223
+ # Risk factors bar chart
224
+ risk_factors = {
225
+ 'Complexity': min(1.0, complexity / 10),
226
+ 'Size': min(1.0, analysis_results.get('total_lines', 1000) / 20000),
227
+ 'ML Prediction': bug_probability,
228
+ 'Features': (int(analysis_results.get('has_memory', False)) +
229
+ int(analysis_results.get('has_fsm', False))) * 0.5
230
+ }
231
+
232
+ fig.add_trace(
233
+ go.Bar(
234
+ x=list(risk_factors.keys()),
235
+ y=list(risk_factors.values()),
236
+ marker_color=['blue', 'green', 'red', 'orange'],
237
+ name="Risk Factors"
238
+ ),
239
+ row=2, col=1
240
+ )
241
+
242
+ # Complexity vs Risk scatter
243
+ fig.add_trace(
244
+ go.Scatter(
245
+ x=[complexity],
246
+ y=[risk_score],
247
+ mode='markers',
248
+ marker=dict(size=20, color='red', symbol='diamond'),
249
+ name="Current Design",
250
+ text=[f"Risk: {risk_score:.2f}<br>Complexity: {complexity:.2f}"],
251
+ hovertemplate="%{text}<extra></extra>"
252
+ ),
253
+ row=2, col=2
254
+ )
255
+
256
+ # Add reference points
257
+ ref_complexities = np.linspace(1, 10, 20)
258
+ ref_risks = 0.1 + 0.7 * (ref_complexities / 10) + np.random.normal(0, 0.05, 20)
259
+ ref_risks = np.clip(ref_risks, 0, 1)
260
+
261
+ fig.add_trace(
262
+ go.Scatter(
263
+ x=ref_complexities,
264
+ y=ref_risks,
265
+ mode='markers',
266
+ marker=dict(size=8, color='lightblue', opacity=0.6),
267
+ name="Reference Designs",
268
+ hovertemplate="Complexity: %{x:.1f}<br>Risk: %{y:.2f}<extra></extra>"
269
+ ),
270
+ row=2, col=2
271
+ )
272
+
273
+ # Update layout
274
+ fig.update_layout(
275
+ title_text="Chip Design Risk Assessment Dashboard",
276
+ title_x=0.5,
277
+ showlegend=True,
278
+ height=600
279
+ )
280
+
281
+ fig.update_xaxes(title_text="Risk Factor", row=2, col=1)
282
+ fig.update_yaxes(title_text="Risk Level", row=2, col=1)
283
+ fig.update_xaxes(title_text="Complexity Score", row=2, col=2)
284
+ fig.update_yaxes(title_text="Risk Score", row=2, col=2)
285
+
286
+ return fig
287
+
288
+ def create_coverage_plot(coverage_data: Dict[str, Any]) -> go.Figure:
289
+ """Create coverage analysis visualization"""
290
+
291
+ coverage_types = ['Line', 'Branch', 'Toggle', 'Functional', 'Assertion']
292
+ coverage_values = [
293
+ coverage_data.get('line_coverage', 80),
294
+ coverage_data.get('branch_coverage', 75),
295
+ coverage_data.get('toggle_coverage', 70),
296
+ coverage_data.get('functional_coverage', 85),
297
+ coverage_data.get('assertion_coverage', 78)
298
+ ]
299
+
300
+ # Create radar chart for coverage
301
+ fig = go.Figure()
302
+
303
+ fig.add_trace(go.Scatterpolar(
304
+ r=coverage_values,
305
+ theta=coverage_types,
306
+ fill='toself',
307
+ name='Current Coverage',
308
+ line_color='blue'
309
+ ))
310
+
311
+ # Add target coverage
312
+ target_coverage = [95, 90, 85, 95, 90]
313
+ fig.add_trace(go.Scatterpolar(
314
+ r=target_coverage,
315
+ theta=coverage_types,
316
+ fill=None,
317
+ name='Target Coverage',
318
+ line_color='red',
319
+ line_dash='dash'
320
+ ))
321
+
322
+ fig.update_layout(
323
+ polar=dict(
324
+ radialaxis=dict(
325
+ visible=True,
326
+ range=[0, 100]
327
+ )
328
+ ),
329
+ showlegend=True,
330
+ title="Coverage Analysis"
331
+ )
332
+
333
+ return fig
334
+
335
+ def calculate_verification_metrics(test_results: Dict[str, Any]) -> Dict[str, float]:
336
+ """Calculate verification quality metrics"""
337
+
338
+ metrics = {
339
+ 'test_efficiency': 0.0,
340
+ 'bug_detection_rate': 0.0,
341
+ 'coverage_completeness': 0.0,
342
+ 'verification_quality_score': 0.0
343
+ }
344
+
345
+ try:
346
+ # Test efficiency: coverage achieved per time unit
347
+ coverage = test_results.get('coverage_achieved', 80)
348
+ time_spent = test_results.get('verification_time_hours', 10)
349
+ metrics['test_efficiency'] = coverage / max(1, time_spent)
350
+
351
+ # Bug detection rate
352
+ bugs_found = test_results.get('bugs_found', 0)
353
+ total_tests = test_results.get('total_tests', 1)
354
+ metrics['bug_detection_rate'] = bugs_found / max(1, total_tests) * 100
355
+
356
+ # Coverage completeness
357
+ coverage_types = ['line_coverage', 'branch_coverage', 'functional_coverage']
358
+ coverage_scores = [test_results.get(ct, 0) for ct in coverage_types]
359
+ metrics['coverage_completeness'] = sum(coverage_scores) / len(coverage_scores)
360
+
361
+ # Overall verification quality score
362
+ metrics['verification_quality_score'] = (
363
+ metrics['test_efficiency'] * 0.3 +
364
+ metrics['coverage_completeness'] * 0.5 +
365
+ (100 - metrics['bug_detection_rate']) * 0.2
366
+ )
367
+
368
+ except Exception as e:
369
+ print(f"Error calculating metrics: {e}")
370
+
371
+ return metrics