import torch.nn as nn class AestheticScorer(nn.Module): ''' Fine-tuned CLIP model to predict aesthetic scores (e.g., light, depth, composition) based on the PARA dataset. ''' def __init__(self, backbone): super().__init__() self.backbone = backbone # Define the scoring heads hidden_dim = backbone.config.hidden_size self.aesthetic_head = nn.Sequential( nn.Linear(hidden_dim, 1), ) self.quality_head = nn.Sequential( nn.Linear(hidden_dim, 1), ) self.composition_head = nn.Sequential( nn.Linear(hidden_dim, 1), ) self.light_head = nn.Sequential( nn.Linear(hidden_dim, 1), ) self.color_head = nn.Sequential( nn.Linear(hidden_dim, 1), ) self.dof_head = nn.Sequential( nn.Linear(hidden_dim, 1), ) self.content_head = nn.Sequential( nn.Linear(hidden_dim, 1), ) def forward(self, pixel_values): features = self.backbone(pixel_values).pooler_output return self.aesthetic_head(features), self.quality_head(features), self.composition_head(features), self.light_head(features), self.color_head(features), self.dof_head(features), self.content_head(features)