File size: 1,583 Bytes
b4f16a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# app/utils/hf_model_wrapper.py
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import os
# Replace with your HF model id if different
MODEL_ID = os.getenv("LLMGUARD_HF_MODEL", "Tuathe/llmguard-injection-model")
# Lazy load pattern — load once per process
_tokenizer = None
_model = None
_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def _load_model():
global _tokenizer, _model
if _tokenizer is None or _model is None:
# This will download from HF if the model isn't cached locally.
_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
_model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
_model.to(_device)
_model.eval()
return _tokenizer, _model
def classify_prompt(prompt: str, max_length: int = 128):
"""
Classifies prompt. Returns (label_str, confidence_float)
label_str in {"Injection", "Safe"}
"""
tokenizer, model = _load_model()
inputs = tokenizer(
prompt,
return_tensors="pt",
truncation=True,
padding=True,
max_length=max_length
)
# move tensors to device
inputs = {k: v.to(_device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probs = torch.softmax(logits, dim=1)
predicted = torch.argmax(probs, dim=1).item()
confidence = float(probs[0][predicted].cpu().item())
label_str = "Injection" if predicted == 1 else "Safe"
return label_str, confidence
|