llmguard / app /detectors /toxicity.py
Tuathe's picture
Clean repo without large checkpoint files
6826247
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
class ToxicityDetector:
def __init__(self):
model_name = "unitary/toxic-bert"
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
self.labels = [
"toxicity", "severe_toxicity", "obscene", "threat",
"insult", "identity_attack", "sexual_explicit"
]
def detect(self, prompt):
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True)
with torch.no_grad():
outputs = self.model(**inputs)
scores = torch.sigmoid(outputs.logits).squeeze().tolist()
results = [
{"label": label, "score": round(score, 3)}
for label, score in zip(self.labels, scores)
if score > 0.3
]
return {
"label": "Toxic" if results else "Safe",
"details": results
}