|
|
|
import torch.nn as nn |
|
|
|
class AestheticScorer(nn.Module): |
|
''' |
|
Fine-tuned CLIP model to predict aesthetic scores (e.g., light, depth, composition) based on the PARA dataset. |
|
''' |
|
def __init__(self, backbone): |
|
super().__init__() |
|
self.backbone = backbone |
|
|
|
|
|
hidden_dim = backbone.config.hidden_size |
|
self.aesthetic_head = nn.Sequential( |
|
nn.Linear(hidden_dim, 1), |
|
) |
|
|
|
self.quality_head = nn.Sequential( |
|
nn.Linear(hidden_dim, 1), |
|
) |
|
|
|
self.composition_head = nn.Sequential( |
|
nn.Linear(hidden_dim, 1), |
|
) |
|
|
|
self.light_head = nn.Sequential( |
|
nn.Linear(hidden_dim, 1), |
|
) |
|
|
|
self.color_head = nn.Sequential( |
|
nn.Linear(hidden_dim, 1), |
|
) |
|
|
|
self.dof_head = nn.Sequential( |
|
nn.Linear(hidden_dim, 1), |
|
) |
|
|
|
self.content_head = nn.Sequential( |
|
nn.Linear(hidden_dim, 1), |
|
) |
|
|
|
def forward(self, pixel_values): |
|
features = self.backbone(pixel_values).pooler_output |
|
return self.aesthetic_head(features), self.quality_head(features), self.composition_head(features), self.light_head(features), self.color_head(features), self.dof_head(features), self.content_head(features) |
|
|