codebert_vulnerability_detector_multi / modeling_my_roberta.py

initial push (safetensors + custom head)

10e5ef2 verified 9 days ago

2.18 kB

	# modeling_my_roberta.py
	from typing import Optional, Tuple
	import torch
	import torch.nn as nn
	from transformers import RobertaModel, RobertaPreTrainedModel

	class MyRobertaForSequenceClassification(RobertaPreTrainedModel):
	def __init__(self, config):
	super().__init__(config)
	self.num_labels = getattr(config, "num_labels", 4)
	self.roberta = RobertaModel(config, add_pooling_layer=False)
	self.classifier = nn.Linear(config.hidden_size, self.num_labels)
	self.loss_fn = nn.CrossEntropyLoss()
	self.post_init() # init_weights

	def _get_pad_id(self, input_ids):
	# Roberta pad = 1가 일반적이지만 토크나이저에 맞춰 attention_mask 직접 받는 게 더 안전
	return 1

	def _pool(self, last_hidden_state, attention_mask, model_type: str = "roberta"):
	# 너의 로직: roberta면 CLS, 그 외 mask mean
	if last_hidden_state.dim() == 3:
	if model_type in {"bert", "roberta", "deberta", "xlm-roberta", "electra"}:
	return last_hidden_state[:, 0, :]
	mask = attention_mask.unsqueeze(-1).float()
	summed = (last_hidden_state * mask).sum(dim=1)
	denom = mask.sum(dim=1).clamp(min=1e-9)
	return summed / denom
	elif last_hidden_state.dim() == 2:
	return last_hidden_state
	else:
	raise ValueError(f"Unexpected hidden dim: {last_hidden_state.dim()}")

	def forward(
	self,
	input_ids=None,
	attention_mask=None,
	labels: Optional[torch.LongTensor] = None,
	**kwargs
	):
	if attention_mask is None and input_ids is not None:
	pad_id = self._get_pad_id(input_ids)
	attention_mask = input_ids.ne(pad_id)

	outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
	hidden = outputs.last_hidden_state
	pooled = self._pool(hidden, attention_mask, "roberta")
	logits = self.classifier(pooled)

	loss = None
	if labels is not None:
	loss = self.loss_fn(logits, labels.long())

	return {"loss": loss, "logits": logits}