Spaces:

Tuathe
/

llmguard

Sleeping

llmguard / app /utils /hf_model_wrapper.py

Deploy LLMGuard to Hugging Face Spaces

b4f16a5 about 1 month ago

1.58 kB

	# app/utils/hf_model_wrapper.py
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import os

	# Replace with your HF model id if different
	MODEL_ID = os.getenv("LLMGUARD_HF_MODEL", "Tuathe/llmguard-injection-model")

	# Lazy load pattern — load once per process
	_tokenizer = None
	_model = None
	_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	def _load_model():
	global _tokenizer, _model
	if _tokenizer is None or _model is None:
	# This will download from HF if the model isn't cached locally.
	_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	_model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
	_model.to(_device)
	_model.eval()
	return _tokenizer, _model

	def classify_prompt(prompt: str, max_length: int = 128):
	"""
	Classifies prompt. Returns (label_str, confidence_float)
	label_str in {"Injection", "Safe"}
	"""
	tokenizer, model = _load_model()
	inputs = tokenizer(
	prompt,
	return_tensors="pt",
	truncation=True,
	padding=True,
	max_length=max_length
	)
	# move tensors to device
	inputs = {k: v.to(_device) for k, v in inputs.items()}
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits
	probs = torch.softmax(logits, dim=1)
	predicted = torch.argmax(probs, dim=1).item()
	confidence = float(probs[0][predicted].cpu().item())

	label_str = "Injection" if predicted == 1 else "Safe"
	return label_str, confidence