#!/usr/bin/env python3
"""
Utility functions for ChipVerifyAI
RTL parsing, metrics calculation, and visualization helpers
"""
import re
import pandas as pd
import numpy as np
from typing import Dict, List, Any, Optional
from pathlib import Path
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
class RTLParser:
"""Parse RTL files to extract design features"""
def __init__(self):
# Regex patterns for RTL parsing
self.patterns = {
'module': r'\bmodule\s+(\w+)',
'always_block': r'\balways\s*[@\(\)]',
'assign': r'\bassign\s+',
'if_statement': r'\bif\s*\(',
'case_statement': r'\bcase\s*\(',
'for_loop': r'\bfor\s*\(',
'function': r'\bfunction\s+',
'task': r'\btask\s+',
'signal': r'(?:wire|reg|logic)\s+(?:\[[^\]]+\])?\s*(\w+)',
'clock': r'\b(?:clk|clock)\b',
'reset': r'\b(?:rst|reset)\b',
'memory': r'\b(?:ram|rom|memory|mem)\b',
'fsm': r'\b(?:state|fsm|STATE|FSM)\b'
}
self.compiled_patterns = {k: re.compile(v, re.IGNORECASE)
for k, v in self.patterns.items()}
def parse_rtl_content(self, content: str) -> Dict[str, Any]:
"""Parse RTL content and extract features"""
features = {
'lines_of_code': len(content.splitlines()),
'module_count': 0,
'signal_count': 0,
'always_blocks': 0,
'assign_statements': 0,
'if_statements': 0,
'case_statements': 0,
'for_loops': 0,
'function_count': 0,
'task_count': 0,
'clock_signals': 0,
'reset_signals': 0,
'has_memory': False,
'has_fsm': False,
'complexity_score': 0.0
}
try:
# Count occurrences
features['module_count'] = len(self.compiled_patterns['module'].findall(content))
features['always_blocks'] = len(self.compiled_patterns['always_block'].findall(content))
features['assign_statements'] = len(self.compiled_patterns['assign'].findall(content))
features['if_statements'] = len(self.compiled_patterns['if_statement'].findall(content))
features['case_statements'] = len(self.compiled_patterns['case_statement'].findall(content))
features['for_loops'] = len(self.compiled_patterns['for_loop'].findall(content))
features['function_count'] = len(self.compiled_patterns['function'].findall(content))
features['task_count'] = len(self.compiled_patterns['task'].findall(content))
# Extract signal names
signals = self.compiled_patterns['signal'].findall(content)
features['signal_count'] = len(set(signals)) # Unique signals
# Check for specific features
features['clock_signals'] = len(self.compiled_patterns['clock'].findall(content))
features['reset_signals'] = len(self.compiled_patterns['reset'].findall(content))
features['has_memory'] = bool(self.compiled_patterns['memory'].search(content))
features['has_fsm'] = bool(self.compiled_patterns['fsm'].search(content))
# Calculate complexity score
features['complexity_score'] = self._calculate_complexity(features)
except Exception as e:
print(f"Warning: RTL parsing error: {e}")
return features
def _calculate_complexity(self, features: Dict[str, Any]) -> float:
"""Calculate design complexity score"""
# Weighted complexity calculation
weights = {
'lines_of_code': 0.0001,
'module_count': 0.5,
'always_blocks': 0.3,
'if_statements': 0.1,
'case_statements': 0.2,
'for_loops': 0.3,
'function_count': 0.2,
'task_count': 0.2,
'has_memory': 1.0,
'has_fsm': 0.8
}
complexity = 0.0
for feature, weight in weights.items():
if feature in features:
value = features[feature]
if isinstance(value, bool):
value = int(value)
complexity += value * weight
return round(complexity, 2)
class DataPreprocessor:
"""Preprocess data for ML training"""
def __init__(self):
self.feature_columns = [
'lines_of_code', 'module_count', 'signal_count', 'always_blocks',
'assign_statements', 'if_statements', 'case_statements', 'for_loops',
'function_count', 'task_count', 'clock_domains', 'reset_signals',
'interface_signals', 'memory_instances', 'fsm_count', 'pipeline_stages',
'arithmetic_units', 'complexity_score', 'has_memory', 'has_fsm',
'has_pipeline', 'has_floating_point', 'is_complex', 'is_large'
]
def preprocess_for_ml(self, df: pd.DataFrame) -> pd.DataFrame:
"""Preprocess DataFrame for ML training"""
processed_df = df.copy()
# Fill missing values
for col in self.feature_columns:
if col in processed_df.columns:
if processed_df[col].dtype == 'bool':
processed_df[col] = processed_df[col].fillna(False)
else:
processed_df[col] = processed_df[col].fillna(processed_df[col].median())
# Convert boolean columns to int
bool_columns = processed_df.select_dtypes(include=['bool']).columns
processed_df[bool_columns] = processed_df[bool_columns].astype(int)
# Remove outliers
processed_df = self._remove_outliers(processed_df)
return processed_df
def _remove_outliers(self, df: pd.DataFrame, threshold: float = 3.0) -> pd.DataFrame:
"""Remove outliers using Z-score method"""
numeric_columns = df.select_dtypes(include=[np.number]).columns
for col in numeric_columns:
if col in df.columns:
z_scores = np.abs((df[col] - df[col].mean()) / df[col].std())
df = df[z_scores < threshold]
return df
def create_risk_dashboard(analysis_results: Dict[str, Any]) -> go.Figure:
"""Create risk assessment dashboard visualization"""
# Extract data
risk_score = analysis_results.get('risk_score', 0)
ml_analysis = analysis_results.get('ml_analysis', {})
bug_probability = ml_analysis.get('bug_probability', 0) if isinstance(ml_analysis, dict) else 0
complexity = analysis_results.get('complexity_score', analysis_results.get('complexity_estimate', 0))
# Create subplots
fig = make_subplots(
rows=2, cols=2,
specs=[[{"type": "indicator"}, {"type": "indicator"}],
[{"type": "bar"}, {"type": "scatter"}]],
subplot_titles=("Overall Risk Score", "Bug Probability",
"Risk Factors", "Complexity vs Risk")
)
# Risk score gauge
fig.add_trace(
go.Indicator(
mode="gauge+number+delta",
value=risk_score * 100,
domain={'x': [0, 1], 'y': [0, 1]},
title={'text': "Risk %"},
gauge={
'axis': {'range': [None, 100]},
'bar': {'color': "darkblue"},
'steps': [
{'range': [0, 40], 'color': "lightgray"},
{'range': [40, 70], 'color': "yellow"},
{'range': [70, 100], 'color': "red"}
],
'threshold': {
'line': {'color': "red", 'width': 4},
'thickness': 0.75,
'value': 90
}
}
),
row=1, col=1
)
# Bug probability gauge
fig.add_trace(
go.Indicator(
mode="gauge+number",
value=bug_probability * 100,
domain={'x': [0, 1], 'y': [0, 1]},
title={'text': "Bug Probability %"},
gauge={
'axis': {'range': [None, 100]},
'bar': {'color': "darkred"},
'steps': [
{'range': [0, 30], 'color': "green"},
{'range': [30, 60], 'color': "yellow"},
{'range': [60, 100], 'color': "red"}
]
}
),
row=1, col=2
)
# Risk factors bar chart
risk_factors = {
'Complexity': min(1.0, complexity / 10),
'Size': min(1.0, analysis_results.get('total_lines', 1000) / 20000),
'ML Prediction': bug_probability,
'Features': (int(analysis_results.get('has_memory', False)) +
int(analysis_results.get('has_fsm', False))) * 0.5
}
fig.add_trace(
go.Bar(
x=list(risk_factors.keys()),
y=list(risk_factors.values()),
marker_color=['blue', 'green', 'red', 'orange'],
name="Risk Factors"
),
row=2, col=1
)
# Complexity vs Risk scatter
fig.add_trace(
go.Scatter(
x=[complexity],
y=[risk_score],
mode='markers',
marker=dict(size=20, color='red', symbol='diamond'),
name="Current Design",
text=[f"Risk: {risk_score:.2f}
Complexity: {complexity:.2f}"],
hovertemplate="%{text}"
),
row=2, col=2
)
# Add reference points
ref_complexities = np.linspace(1, 10, 20)
ref_risks = 0.1 + 0.7 * (ref_complexities / 10) + np.random.normal(0, 0.05, 20)
ref_risks = np.clip(ref_risks, 0, 1)
fig.add_trace(
go.Scatter(
x=ref_complexities,
y=ref_risks,
mode='markers',
marker=dict(size=8, color='lightblue', opacity=0.6),
name="Reference Designs",
hovertemplate="Complexity: %{x:.1f}
Risk: %{y:.2f}"
),
row=2, col=2
)
# Update layout
fig.update_layout(
title_text="Chip Design Risk Assessment Dashboard",
title_x=0.5,
showlegend=True,
height=600
)
fig.update_xaxes(title_text="Risk Factor", row=2, col=1)
fig.update_yaxes(title_text="Risk Level", row=2, col=1)
fig.update_xaxes(title_text="Complexity Score", row=2, col=2)
fig.update_yaxes(title_text="Risk Score", row=2, col=2)
return fig
def create_coverage_plot(coverage_data: Dict[str, Any]) -> go.Figure:
"""Create coverage analysis visualization"""
coverage_types = ['Line', 'Branch', 'Toggle', 'Functional', 'Assertion']
coverage_values = [
coverage_data.get('line_coverage', 80),
coverage_data.get('branch_coverage', 75),
coverage_data.get('toggle_coverage', 70),
coverage_data.get('functional_coverage', 85),
coverage_data.get('assertion_coverage', 78)
]
# Create radar chart for coverage
fig = go.Figure()
fig.add_trace(go.Scatterpolar(
r=coverage_values,
theta=coverage_types,
fill='toself',
name='Current Coverage',
line_color='blue'
))
# Add target coverage
target_coverage = [95, 90, 85, 95, 90]
fig.add_trace(go.Scatterpolar(
r=target_coverage,
theta=coverage_types,
fill=None,
name='Target Coverage',
line_color='red',
line_dash='dash'
))
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 100]
)
),
showlegend=True,
title="Coverage Analysis"
)
return fig
def calculate_verification_metrics(test_results: Dict[str, Any]) -> Dict[str, float]:
"""Calculate verification quality metrics"""
metrics = {
'test_efficiency': 0.0,
'bug_detection_rate': 0.0,
'coverage_completeness': 0.0,
'verification_quality_score': 0.0
}
try:
# Test efficiency: coverage achieved per time unit
coverage = test_results.get('coverage_achieved', 80)
time_spent = test_results.get('verification_time_hours', 10)
metrics['test_efficiency'] = coverage / max(1, time_spent)
# Bug detection rate
bugs_found = test_results.get('bugs_found', 0)
total_tests = test_results.get('total_tests', 1)
metrics['bug_detection_rate'] = bugs_found / max(1, total_tests) * 100
# Coverage completeness
coverage_types = ['line_coverage', 'branch_coverage', 'functional_coverage']
coverage_scores = [test_results.get(ct, 0) for ct in coverage_types]
metrics['coverage_completeness'] = sum(coverage_scores) / len(coverage_scores)
# Overall verification quality score
metrics['verification_quality_score'] = (
metrics['test_efficiency'] * 0.3 +
metrics['coverage_completeness'] * 0.5 +
(100 - metrics['bug_detection_rate']) * 0.2
)
except Exception as e:
print(f"Error calculating metrics: {e}")
return metrics