| import torch | |
| import torch.nn as nn | |
| import timm | |
| from huggingface_hub import PyTorchModelHubMixin | |
| class KeypointModel(nn.Module, PyTorchModelHubMixin): | |
| def __init__(self, config, **kwargs): | |
| super().__init__() | |
| upsample_size = config.heatmap_size | |
| backbone = timm.create_model('convnextv2_base.fcmae_ft_in22k_in1k_384', pretrained=False) | |
| self.feature_extractor = nn.Sequential(*list(backbone.children())[:-2]) | |
| in_channels = backbone.num_features | |
| self.head = nn.Sequential( | |
| nn.Conv2d(in_channels, 256, kernel_size=3, padding=1), | |
| nn.ReLU(inplace=True), | |
| nn.Upsample(size=upsample_size, mode='bilinear', align_corners=False), | |
| nn.Conv2d(256, 1, kernel_size=1) | |
| ) | |
| def forward(self, image): | |
| features = self.feature_extractor(image) | |
| heatmap = self.head(features) | |
| return heatmap |