Spaces:
Sleeping
Sleeping
devjas1
FEAT(modern_ml_architecture): implement comprehensive transformer-based architecture for polymer analysis with multi-task learning and uncertainty estimation
8dd961f
""" | |
Modern ML Architecture for POLYMEROS | |
Implements transformer-based models, multi-task learning, and ensemble methods | |
""" | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from torch.utils.data import Dataset, DataLoader | |
import numpy as np | |
import pandas as pd | |
from typing import Dict, List, Tuple, Optional, Union, Any | |
from dataclasses import dataclass | |
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor | |
from sklearn.metrics import accuracy_score, mean_squared_error | |
import xgboost as xgb | |
from scipy import stats | |
import warnings | |
import json | |
from pathlib import Path | |
class ModelPrediction: | |
"""Structured prediction output with uncertainty quantification""" | |
prediction: Union[int, float, np.ndarray] | |
confidence: float | |
uncertainty_epistemic: float # Model uncertainty | |
uncertainty_aleatoric: float # Data uncertainty | |
class_probabilities: Optional[np.ndarray] = None | |
feature_importance: Optional[Dict[str, float]] = None | |
explanation: Optional[str] = None | |
class MultiTaskTarget: | |
"""Multi-task learning targets""" | |
classification_target: Optional[int] = None # Polymer type classification | |
degradation_level: Optional[float] = None # Continuous degradation score | |
property_predictions: Optional[Dict[str, float]] = None # Material properties | |
aging_rate: Optional[float] = None # Rate of aging prediction | |
class SpectralTransformerBlock(nn.Module): | |
"""Transformer block optimized for spectral data""" | |
def __init__(self, d_model: int, num_heads: int, d_ff: int, dropout: float = 0.1): | |
super().__init__() | |
self.d_model = d_model | |
self.num_heads = num_heads | |
# Multi-head attention | |
self.attention = nn.MultiheadAttention( | |
d_model, num_heads, dropout=dropout, batch_first=True | |
) | |
# Feed-forward network | |
self.ff_network = nn.Sequential( | |
nn.Linear(d_model, d_ff), | |
nn.ReLU(), | |
nn.Dropout(dropout), | |
nn.Linear(d_ff, d_model), | |
) | |
# Layer normalization | |
self.ln1 = nn.LayerNorm(d_model) | |
self.ln2 = nn.LayerNorm(d_model) | |
# Dropout | |
self.dropout = nn.Dropout(dropout) | |
def forward( | |
self, x: torch.Tensor, mask: Optional[torch.Tensor] = None | |
) -> torch.Tensor: | |
# Self-attention with residual connection | |
attn_output, attention_weights = self.attention(x, x, x, attn_mask=mask) | |
x = self.ln1(x + self.dropout(attn_output)) | |
# Feed-forward with residual connection | |
ff_output = self.ff_network(x) | |
x = self.ln2(x + self.dropout(ff_output)) | |
return x | |
class SpectralPositionalEncoding(nn.Module): | |
"""Positional encoding adapted for spectral wavenumber information""" | |
def __init__(self, d_model: int, max_seq_length: int = 2000): | |
super().__init__() | |
self.d_model = d_model | |
# Create positional encoding matrix | |
pe = torch.zeros(max_seq_length, d_model) | |
position = torch.arange(0, max_seq_length, dtype=torch.float).unsqueeze(1) | |
# Use different frequencies for different dimensions | |
div_term = torch.exp( | |
torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model) | |
) | |
pe[:, 0::2] = torch.sin(position * div_term) | |
pe[:, 1::2] = torch.cos(position * div_term) | |
self.register_buffer("pe", pe.unsqueeze(0)) | |
def forward(self, x: torch.Tensor) -> torch.Tensor: | |
seq_len = x.size(1) | |
return x + self.pe[:, :seq_len, :].to(x.device) | |
class SpectralTransformer(nn.Module): | |
"""Transformer architecture optimized for spectral analysis""" | |
def __init__( | |
self, | |
input_dim: int = 1, | |
d_model: int = 256, | |
num_heads: int = 8, | |
num_layers: int = 6, | |
d_ff: int = 1024, | |
max_seq_length: int = 2000, | |
num_classes: int = 2, | |
dropout: float = 0.1, | |
): | |
super().__init__() | |
self.d_model = d_model | |
self.num_classes = num_classes | |
# Input projection | |
self.input_projection = nn.Linear(input_dim, d_model) | |
# Positional encoding | |
self.pos_encoding = SpectralPositionalEncoding(d_model, max_seq_length) | |
# Transformer layers | |
self.transformer_layers = nn.ModuleList( | |
[ | |
SpectralTransformerBlock(d_model, num_heads, d_ff, dropout) | |
for _ in range(num_layers) | |
] | |
) | |
# Classification head | |
self.classification_head = nn.Sequential( | |
nn.Linear(d_model, d_model // 2), | |
nn.ReLU(), | |
nn.Dropout(dropout), | |
nn.Linear(d_model // 2, num_classes), | |
) | |
# Regression heads for multi-task learning | |
self.degradation_head = nn.Sequential( | |
nn.Linear(d_model, d_model // 2), | |
nn.ReLU(), | |
nn.Dropout(dropout), | |
nn.Linear(d_model // 2, 1), | |
) | |
self.property_head = nn.Sequential( | |
nn.Linear(d_model, d_model // 2), | |
nn.ReLU(), | |
nn.Dropout(dropout), | |
nn.Linear(d_model // 2, 5), # Predict 5 material properties | |
) | |
# Uncertainty estimation layers | |
self.uncertainty_head = nn.Sequential( | |
nn.Linear(d_model, d_model // 4), | |
nn.ReLU(), | |
nn.Linear(d_model // 4, 2), # Epistemic and aleatoric uncertainty | |
) | |
# Attention pooling for sequence aggregation | |
self.attention_pool = nn.MultiheadAttention(d_model, 1, batch_first=True) | |
self.pool_query = nn.Parameter(torch.randn(1, 1, d_model)) | |
self.dropout = nn.Dropout(dropout) | |
def forward( | |
self, x: torch.Tensor, return_attention: bool = False | |
) -> Dict[str, torch.Tensor]: | |
batch_size, seq_len, input_dim = x.shape | |
# Input projection and positional encoding | |
x = self.input_projection(x) # (batch, seq_len, d_model) | |
x = self.pos_encoding(x) | |
x = self.dropout(x) | |
# Store attention weights if requested | |
attention_weights = [] | |
# Pass through transformer layers | |
for layer in self.transformer_layers: | |
x = layer(x) | |
# Attention pooling to get sequence representation | |
query = self.pool_query.expand(batch_size, -1, -1) | |
pooled_output, pool_attention = self.attention_pool(query, x, x) | |
pooled_output = pooled_output.squeeze(1) # (batch, d_model) | |
if return_attention: | |
attention_weights.append(pool_attention) | |
# Multi-task outputs | |
outputs = {} | |
# Classification output | |
classification_logits = self.classification_head(pooled_output) | |
outputs["classification_logits"] = classification_logits | |
outputs["classification_probs"] = F.softmax(classification_logits, dim=-1) | |
# Degradation prediction | |
degradation_pred = self.degradation_head(pooled_output) | |
outputs["degradation_prediction"] = degradation_pred | |
# Property predictions | |
property_pred = self.property_head(pooled_output) | |
outputs["property_predictions"] = property_pred | |
# Uncertainty estimation | |
uncertainty_pred = self.uncertainty_head(pooled_output) | |
outputs["uncertainty_epistemic"] = torch.nn.Softplus()(uncertainty_pred[:, 0]) | |
outputs["uncertainty_aleatoric"] = F.softplus(uncertainty_pred[:, 1]) | |
if return_attention: | |
outputs["attention_weights"] = attention_weights | |
return outputs | |
class BayesianUncertaintyEstimator: | |
"""Bayesian uncertainty quantification using Monte Carlo dropout""" | |
def __init__(self, model: nn.Module, num_samples: int = 100): | |
self.model = model | |
self.num_samples = num_samples | |
def enable_dropout(self, model: nn.Module): | |
"""Enable dropout for uncertainty estimation""" | |
for module in model.modules(): | |
if isinstance(module, nn.Dropout): | |
module.train() | |
def predict_with_uncertainty(self, x: torch.Tensor) -> Dict[str, torch.Tensor]: | |
""" | |
Predict with uncertainty quantification using Monte Carlo dropout | |
Args: | |
x: Input tensor | |
Returns: | |
Predictions with uncertainty estimates | |
""" | |
self.model.eval() | |
self.enable_dropout(self.model) | |
predictions = [] | |
classification_probs = [] | |
degradation_preds = [] | |
uncertainty_estimates = [] | |
with torch.no_grad(): | |
for _ in range(self.num_samples): | |
output = self.model(x) | |
predictions.append(output["classification_probs"]) | |
classification_probs.append(output["classification_probs"]) | |
degradation_preds.append(output["degradation_prediction"]) | |
uncertainty_estimates.append( | |
torch.stack( | |
[ | |
output["uncertainty_epistemic"], | |
output["uncertainty_aleatoric"], | |
], | |
dim=1, | |
) | |
) | |
# Stack predictions | |
classification_stack = torch.stack( | |
classification_probs, dim=0 | |
) # (num_samples, batch, classes) | |
degradation_stack = torch.stack(degradation_preds, dim=0) | |
uncertainty_stack = torch.stack(uncertainty_estimates, dim=0) | |
# Calculate statistics | |
mean_classification = classification_stack.mean(dim=0) | |
std_classification = classification_stack.std(dim=0) | |
mean_degradation = degradation_stack.mean(dim=0) | |
std_degradation = degradation_stack.std(dim=0) | |
mean_uncertainty = uncertainty_stack.mean(dim=0) | |
# Calculate epistemic uncertainty (model uncertainty) | |
epistemic_uncertainty = std_classification.mean(dim=1) | |
# Calculate aleatoric uncertainty (data uncertainty) | |
aleatoric_uncertainty = mean_uncertainty[:, 1] | |
return { | |
"mean_classification": mean_classification, | |
"std_classification": std_classification, | |
"mean_degradation": mean_degradation, | |
"std_degradation": std_degradation, | |
"epistemic_uncertainty": epistemic_uncertainty, | |
"aleatoric_uncertainty": aleatoric_uncertainty, | |
"total_uncertainty": epistemic_uncertainty + aleatoric_uncertainty, | |
} | |
class EnsembleModel: | |
"""Ensemble model combining multiple approaches""" | |
def __init__(self): | |
self.models = {} | |
self.weights = {} | |
self.is_fitted = False | |
def add_transformer_model(self, model: SpectralTransformer, weight: float = 1.0): | |
"""Add transformer model to ensemble""" | |
self.models["transformer"] = model | |
self.weights["transformer"] = weight | |
def add_random_forest(self, n_estimators: int = 100, weight: float = 1.0): | |
"""Add Random Forest to ensemble""" | |
self.models["random_forest_clf"] = RandomForestClassifier( | |
n_estimators=n_estimators, random_state=42, oob_score=True | |
) | |
self.models["random_forest_reg"] = RandomForestRegressor( | |
n_estimators=n_estimators, random_state=42, oob_score=True | |
) | |
self.weights["random_forest"] = weight | |
def add_xgboost(self, weight: float = 1.0): | |
"""Add XGBoost to ensemble""" | |
self.models["xgboost_clf"] = xgb.XGBClassifier( | |
n_estimators=100, random_state=42, eval_metric="logloss" | |
) | |
self.models["xgboost_reg"] = xgb.XGBRegressor(n_estimators=100, random_state=42) | |
self.weights["xgboost"] = weight | |
def fit( | |
self, | |
X: np.ndarray, | |
y_classification: np.ndarray, | |
y_degradation: Optional[np.ndarray] = None, | |
): | |
""" | |
Fit ensemble models | |
Args: | |
X: Input features (flattened spectra for traditional ML models) | |
y_classification: Classification targets | |
y_degradation: Degradation targets (optional) | |
""" | |
# Fit Random Forest | |
if "random_forest_clf" in self.models: | |
self.models["random_forest_clf"].fit(X, y_classification) | |
if y_degradation is not None: | |
self.models["random_forest_reg"].fit(X, y_degradation) | |
# Fit XGBoost | |
if "xgboost_clf" in self.models: | |
self.models["xgboost_clf"].fit(X, y_classification) | |
if y_degradation is not None: | |
self.models["xgboost_reg"].fit(X, y_degradation) | |
self.is_fitted = True | |
def predict( | |
self, X: np.ndarray, X_transformer: Optional[torch.Tensor] = None | |
) -> ModelPrediction: | |
""" | |
Ensemble prediction with uncertainty quantification | |
Args: | |
X: Input features for traditional ML models | |
X_transformer: Input tensor for transformer model | |
Returns: | |
Ensemble prediction with uncertainty | |
""" | |
if not self.is_fitted and "transformer" not in self.models: | |
raise ValueError( | |
"Ensemble must be fitted or contain pre-trained transformer" | |
) | |
predictions = {} | |
classification_probs = [] | |
degradation_preds = [] | |
model_weights = [] | |
# Random Forest predictions | |
if ( | |
"random_forest_clf" in self.models | |
and self.models["random_forest_clf"] is not None | |
): | |
rf_probs = self.models["random_forest_clf"].predict_proba(X) | |
classification_probs.append(rf_probs) | |
model_weights.append(self.weights["random_forest"]) | |
if "random_forest_reg" in self.models: | |
rf_degradation = self.models["random_forest_reg"].predict(X) | |
degradation_preds.append(rf_degradation) | |
# XGBoost predictions | |
if "xgboost_clf" in self.models and self.models["xgboost_clf"] is not None: | |
xgb_probs = self.models["xgboost_clf"].predict_proba(X) | |
classification_probs.append(xgb_probs) | |
model_weights.append(self.weights["xgboost"]) | |
if "xgboost_reg" in self.models: | |
xgb_degradation = self.models["xgboost_reg"].predict(X) | |
degradation_preds.append(xgb_degradation) | |
# Transformer predictions | |
if "transformer" in self.models and X_transformer is not None: | |
transformer_output = self.models["transformer"](X_transformer) | |
transformer_probs = ( | |
transformer_output["classification_probs"].detach().numpy() | |
) | |
classification_probs.append(transformer_probs) | |
model_weights.append(self.weights["transformer"]) | |
transformer_degradation = ( | |
transformer_output["degradation_prediction"].detach().numpy() | |
) | |
degradation_preds.append(transformer_degradation.flatten()) | |
# Weighted ensemble | |
if classification_probs: | |
model_weights = np.array(model_weights) | |
model_weights = model_weights / np.sum(model_weights) # Normalize | |
# Weighted average of probabilities | |
ensemble_probs = np.zeros_like(classification_probs[0]) | |
for i, probs in enumerate(classification_probs): | |
ensemble_probs += model_weights[i] * probs | |
# Predicted class | |
predicted_class = np.argmax(ensemble_probs, axis=1)[0] | |
confidence = np.max(ensemble_probs, axis=1)[0] | |
# Calculate uncertainty from model disagreement | |
prob_variance = np.var([probs[0] for probs in classification_probs], axis=0) | |
epistemic_uncertainty = np.mean(prob_variance) | |
# Aleatoric uncertainty (average across models) | |
aleatoric_uncertainty = 1.0 - confidence # Simple estimate | |
# Degradation prediction | |
ensemble_degradation = None | |
if degradation_preds: | |
ensemble_degradation = np.average( | |
degradation_preds, weights=model_weights, axis=0 | |
)[0] | |
else: | |
raise ValueError("No valid predictions could be made") | |
# Feature importance (from Random Forest if available) | |
feature_importance = None | |
if ( | |
"random_forest_clf" in self.models | |
and self.models["random_forest_clf"] is not None | |
): | |
importance = self.models["random_forest_clf"].feature_importances_ | |
# Convert to wavenumber-based importance (assuming spectral input) | |
feature_importance = { | |
f"wavenumber_{i}": float(importance[i]) for i in range(len(importance)) | |
} | |
return ModelPrediction( | |
prediction=predicted_class, | |
confidence=confidence, | |
uncertainty_epistemic=epistemic_uncertainty, | |
uncertainty_aleatoric=aleatoric_uncertainty, | |
class_probabilities=ensemble_probs[0], | |
feature_importance=feature_importance, | |
explanation=self._generate_explanation( | |
predicted_class, confidence, ensemble_degradation | |
), | |
) | |
def _generate_explanation( | |
self, | |
predicted_class: int, | |
confidence: float, | |
degradation: Optional[float] = None, | |
) -> str: | |
"""Generate human-readable explanation""" | |
class_names = {0: "Stable (Unweathered)", 1: "Weathered"} | |
class_name = class_names.get(predicted_class, f"Class {predicted_class}") | |
explanation = f"Predicted class: {class_name} (confidence: {confidence:.3f})" | |
if degradation is not None: | |
explanation += f"\nEstimated degradation level: {degradation:.3f}" | |
if confidence > 0.8: | |
explanation += "\nHigh confidence prediction - strong spectral evidence" | |
elif confidence > 0.6: | |
explanation += "\nModerate confidence - some uncertainty in classification" | |
else: | |
explanation += "\nLow confidence - significant uncertainty, consider additional analysis" | |
return explanation | |
class MultiTaskLearningFramework: | |
"""Framework for multi-task learning in polymer analysis""" | |
def __init__(self, model: SpectralTransformer): | |
self.model = model | |
self.task_weights = { | |
"classification": 1.0, | |
"degradation": 0.5, | |
"properties": 0.3, | |
} | |
self.optimizer = None | |
self.scheduler = None | |
def setup_training(self, learning_rate: float = 1e-4): | |
"""Setup optimizer and scheduler""" | |
self.optimizer = torch.optim.AdamW( | |
self.model.parameters(), lr=learning_rate, weight_decay=0.01 | |
) | |
self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( | |
self.optimizer, T_max=100 | |
) | |
def compute_loss( | |
self, | |
outputs: Dict[str, torch.Tensor], | |
targets: MultiTaskTarget, | |
batch_size: int, | |
) -> Dict[str, torch.Tensor]: | |
""" | |
Compute multi-task loss | |
Args: | |
outputs: Model outputs | |
targets: Multi-task targets | |
batch_size: Batch size | |
Returns: | |
Loss components | |
""" | |
losses = {} | |
total_loss = 0 | |
# Classification loss | |
if targets.classification_target is not None: | |
classification_loss = F.cross_entropy( | |
outputs["classification_logits"], | |
torch.tensor( | |
[targets.classification_target] * batch_size, dtype=torch.long | |
), | |
) | |
losses["classification"] = classification_loss | |
total_loss += self.task_weights["classification"] * classification_loss | |
# Degradation regression loss | |
if targets.degradation_level is not None: | |
degradation_loss = F.mse_loss( | |
outputs["degradation_prediction"].squeeze(), | |
torch.tensor( | |
[targets.degradation_level] * batch_size, dtype=torch.float | |
), | |
) | |
losses["degradation"] = degradation_loss | |
total_loss += self.task_weights["degradation"] * degradation_loss | |
# Property prediction loss | |
if targets.property_predictions is not None: | |
property_targets = torch.tensor( | |
[[targets.property_predictions.get(f"prop_{i}", 0.0) for i in range(5)]] | |
* batch_size, | |
dtype=torch.float, | |
) | |
property_loss = F.mse_loss( | |
outputs["property_predictions"], property_targets | |
) | |
losses["properties"] = property_loss | |
total_loss += self.task_weights["properties"] * property_loss | |
# Uncertainty regularization | |
uncertainty_reg = torch.mean(outputs["uncertainty_epistemic"]) + torch.mean( | |
outputs["uncertainty_aleatoric"] | |
) | |
losses["uncertainty_reg"] = uncertainty_reg | |
total_loss += 0.01 * uncertainty_reg # Small weight for regularization | |
losses["total"] = total_loss | |
return losses | |
def train_step(self, x: torch.Tensor, targets: MultiTaskTarget) -> Dict[str, float]: | |
"""Single training step""" | |
self.model.train() | |
if self.optimizer is None: | |
raise ValueError( | |
"Optimizer is not initialized. Call setup_training() to initialize it." | |
) | |
self.optimizer.zero_grad() | |
outputs = self.model(x) | |
losses = self.compute_loss(outputs, targets, x.size(0)) | |
losses["total"].backward() | |
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0) | |
if self.optimizer is None: | |
raise ValueError( | |
"Optimizer is not initialized. Call setup_training() to initialize it." | |
) | |
self.optimizer.step() | |
return { | |
k: float(v.item()) if torch.is_tensor(v) else float(v) | |
for k, v in losses.items() | |
} | |
class ModernMLPipeline: | |
"""Complete modern ML pipeline for polymer analysis""" | |
def __init__(self, config: Optional[Dict] = None): | |
self.config = config or self._default_config() | |
self.transformer_model = None | |
self.ensemble_model = None | |
self.uncertainty_estimator = None | |
self.multi_task_framework = None | |
def _default_config(self) -> Dict: | |
"""Default configuration""" | |
return { | |
"transformer": { | |
"d_model": 256, | |
"num_heads": 8, | |
"num_layers": 6, | |
"d_ff": 1024, | |
"dropout": 0.1, | |
"num_classes": 2, | |
}, | |
"ensemble": { | |
"transformer_weight": 0.4, | |
"random_forest_weight": 0.3, | |
"xgboost_weight": 0.3, | |
}, | |
"uncertainty": {"num_mc_samples": 50}, | |
"training": {"learning_rate": 1e-4, "batch_size": 32, "num_epochs": 100}, | |
} | |
def initialize_models(self, input_dim: int = 1, max_seq_length: int = 2000): | |
"""Initialize all models""" | |
# Transformer model | |
self.transformer_model = SpectralTransformer( | |
input_dim=input_dim, | |
d_model=self.config["transformer"]["d_model"], | |
num_heads=self.config["transformer"]["num_heads"], | |
num_layers=self.config["transformer"]["num_layers"], | |
d_ff=self.config["transformer"]["d_ff"], | |
max_seq_length=max_seq_length, | |
num_classes=self.config["transformer"]["num_classes"], | |
dropout=self.config["transformer"]["dropout"], | |
) | |
# Uncertainty estimator | |
self.uncertainty_estimator = BayesianUncertaintyEstimator( | |
self.transformer_model, | |
num_samples=self.config["uncertainty"]["num_mc_samples"], | |
) | |
# Multi-task framework | |
self.multi_task_framework = MultiTaskLearningFramework(self.transformer_model) | |
# Ensemble model | |
self.ensemble_model = EnsembleModel() | |
self.ensemble_model.add_transformer_model( | |
self.transformer_model, self.config["ensemble"]["transformer_weight"] | |
) | |
self.ensemble_model.add_random_forest( | |
weight=self.config["ensemble"]["random_forest_weight"] | |
) | |
self.ensemble_model.add_xgboost( | |
weight=self.config["ensemble"]["xgboost_weight"] | |
) | |
def train_ensemble( | |
self, | |
X_flat: np.ndarray, | |
X_transformer: torch.Tensor, | |
y_classification: np.ndarray, | |
y_degradation: Optional[np.ndarray] = None, | |
): | |
"""Train the ensemble model""" | |
if self.ensemble_model is None: | |
raise ValueError("Models not initialized. Call initialize_models() first.") | |
# Train traditional ML models | |
self.ensemble_model.fit(X_flat, y_classification, y_degradation) | |
# Setup transformer training | |
if self.multi_task_framework is None: | |
raise ValueError( | |
"Multi-task framework is not initialized. Call initialize_models() first." | |
) | |
self.multi_task_framework.setup_training( | |
self.config["training"]["learning_rate"] | |
) | |
print( | |
"Ensemble training completed (transformer training would require full training loop)" | |
) | |
def predict_with_all_methods( | |
self, X_flat: np.ndarray, X_transformer: torch.Tensor | |
) -> Dict[str, Any]: | |
""" | |
Comprehensive prediction using all methods | |
Args: | |
X_flat: Flattened spectral data for traditional ML | |
X_transformer: Tensor format for transformer | |
Returns: | |
Complete prediction results | |
""" | |
results = {} | |
# Ensemble prediction | |
if self.ensemble_model is None: | |
raise ValueError( | |
"Ensemble model is not initialized. Call initialize_models() first." | |
) | |
ensemble_pred = self.ensemble_model.predict(X_flat, X_transformer) | |
results["ensemble"] = ensemble_pred | |
# Transformer with uncertainty | |
if self.transformer_model is not None: | |
if self.uncertainty_estimator is None: | |
raise ValueError( | |
"Uncertainty estimator is not initialized. Call initialize_models() first." | |
) | |
uncertainty_pred = self.uncertainty_estimator.predict_with_uncertainty( | |
X_transformer | |
) | |
results["transformer_uncertainty"] = uncertainty_pred | |
# Individual model predictions for comparison | |
individual_predictions = {} | |
if ( | |
self.ensemble_model is not None | |
and "random_forest_clf" in self.ensemble_model.models | |
): | |
rf_pred = self.ensemble_model.models["random_forest_clf"].predict_proba( | |
X_flat | |
)[0] | |
individual_predictions["random_forest"] = rf_pred | |
if "xgboost_clf" in self.ensemble_model.models: | |
xgb_pred = self.ensemble_model.models["xgboost_clf"].predict_proba(X_flat)[ | |
0 | |
] | |
individual_predictions["xgboost"] = xgb_pred | |
results["individual_models"] = individual_predictions | |
return results | |
def get_model_insights( | |
self, X_flat: np.ndarray, X_transformer: torch.Tensor | |
) -> Dict[str, Any]: | |
""" | |
Generate insights about model behavior and predictions | |
Args: | |
X_flat: Flattened spectral data | |
X_transformer: Transformer input format | |
Returns: | |
Model insights and explanations | |
""" | |
insights = {} | |
# Feature importance from Random Forest | |
if "random_forest_clf" in self.ensemble_model.models: | |
if ( | |
self.ensemble_model | |
and "random_forest_clf" in self.ensemble_model.models | |
and self.ensemble_model.models["random_forest_clf"] is not None | |
): | |
rf_importance = self.ensemble_model.models[ | |
"random_forest_clf" | |
].feature_importances_ | |
else: | |
rf_importance = None | |
if rf_importance is not None: | |
top_features = np.argsort(rf_importance)[-10:][::-1] | |
else: | |
top_features = [] | |
insights["top_spectral_regions"] = { | |
f"wavenumber_{idx}": float(rf_importance[idx]) | |
for idx in top_features | |
if rf_importance is not None | |
} | |
# Attention weights from transformer | |
if self.transformer_model is not None: | |
self.transformer_model.eval() | |
with torch.no_grad(): | |
outputs = self.transformer_model(X_transformer, return_attention=True) | |
if "attention_weights" in outputs: | |
insights["attention_patterns"] = outputs["attention_weights"] | |
# Uncertainty analysis | |
predictions = self.predict_with_all_methods(X_flat, X_transformer) | |
if "transformer_uncertainty" in predictions: | |
uncertainty_data = predictions["transformer_uncertainty"] | |
insights["uncertainty_analysis"] = { | |
"epistemic_uncertainty": float( | |
uncertainty_data["epistemic_uncertainty"].mean() | |
), | |
"aleatoric_uncertainty": float( | |
uncertainty_data["aleatoric_uncertainty"].mean() | |
), | |
"total_uncertainty": float( | |
uncertainty_data["total_uncertainty"].mean() | |
), | |
"confidence_level": ( | |
"high" | |
if uncertainty_data["total_uncertainty"].mean() < 0.1 | |
else ( | |
"medium" | |
if uncertainty_data["total_uncertainty"].mean() < 0.3 | |
else "low" | |
) | |
), | |
} | |
# Model agreement analysis | |
if "individual_models" in predictions: | |
individual = predictions["individual_models"] | |
agreements = [] | |
for model1_name, model1_pred in individual.items(): | |
for model2_name, model2_pred in individual.items(): | |
if model1_name != model2_name: | |
# Calculate agreement based on prediction similarity | |
agreement = 1.0 - np.abs(model1_pred - model2_pred).mean() | |
agreements.append(agreement) | |
insights["model_agreement"] = { | |
"average_agreement": float(np.mean(agreements)) if agreements else 0.0, | |
"agreement_level": ( | |
"high" | |
if np.mean(agreements) > 0.8 | |
else "medium" if np.mean(agreements) > 0.6 else "low" | |
), | |
} | |
return insights | |
def save_models(self, save_path: Path): | |
"""Save trained models""" | |
save_path = Path(save_path) | |
save_path.mkdir(parents=True, exist_ok=True) | |
# Save transformer model | |
if self.transformer_model is not None: | |
torch.save( | |
self.transformer_model.state_dict(), save_path / "transformer_model.pth" | |
) | |
# Save configuration | |
with open(save_path / "config.json", "w") as f: | |
json.dump(self.config, f, indent=2) | |
print(f"Models saved to {save_path}") | |
def load_models(self, load_path: Path): | |
"""Load pre-trained models""" | |
load_path = Path(load_path) | |
# Load configuration | |
with open(load_path / "config.json", "r") as f: | |
self.config = json.load(f) | |
# Initialize and load transformer | |
self.initialize_models() | |
if ( | |
self.transformer_model is not None | |
and (load_path / "transformer_model.pth").exists() | |
): | |
self.transformer_model.load_state_dict( | |
torch.load(load_path / "transformer_model.pth", map_location="cpu") | |
) | |
else: | |
raise ValueError( | |
"Transformer model is not initialized or model file is missing." | |
) | |
print(f"Models loaded from {load_path}") | |
# Utility functions for data preparation | |
def prepare_transformer_input( | |
spectral_data: np.ndarray, max_length: int = 2000 | |
) -> torch.Tensor: | |
""" | |
Prepare spectral data for transformer input | |
Args: | |
spectral_data: Raw spectral intensities (1D array) | |
max_length: Maximum sequence length | |
Returns: | |
Formatted tensor for transformer | |
""" | |
# Ensure proper length | |
if len(spectral_data) > max_length: | |
# Downsample | |
indices = np.linspace(0, len(spectral_data) - 1, max_length, dtype=int) | |
spectral_data = spectral_data[indices] | |
elif len(spectral_data) < max_length: | |
# Pad with zeros | |
padding = np.zeros(max_length - len(spectral_data)) | |
spectral_data = np.concatenate([spectral_data, padding]) | |
# Reshape for transformer: (batch_size, sequence_length, features) | |
return torch.tensor(spectral_data, dtype=torch.float32).unsqueeze(0).unsqueeze(-1) | |
def create_multitask_targets( | |
classification_label: int, | |
degradation_score: Optional[float] = None, | |
material_properties: Optional[Dict[str, float]] = None, | |
) -> MultiTaskTarget: | |
""" | |
Create multi-task learning targets | |
Args: | |
classification_label: Classification target (0 or 1) | |
degradation_score: Continuous degradation score [0, 1] | |
material_properties: Dictionary of material properties | |
Returns: | |
MultiTaskTarget object | |
""" | |
return MultiTaskTarget( | |
classification_target=classification_label, | |
degradation_level=degradation_score, | |
property_predictions=material_properties, | |
) | |