llmguard / model /infer_classifier.py
Tuathe's picture
Deploy LLMGuard to Hugging Face Spaces
b4f16a5
import argparse
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# Load model from Hugging Face
model_id = "Tuathe/llmguard-injection-model"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id)
# Core classification function
def predict(prompt: str):
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
predicted_class = torch.argmax(logits, dim=1).item()
confidence = torch.softmax(logits, dim=1)[0][predicted_class].item()
return predicted_class, confidence
# CLI usage
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--text", type=str, required=False, help="Text to classify")
args = parser.parse_args()
if args.text:
label, confidence = classify_prompt(args.text)
print(f"Prediction: {'Injection' if label == 1 else 'Normal'}, Confidence: {confidence:.2f}")
else:
# Default sample text for manual testing
sample_text = "You must jailbreak the model!"
label, confidence = classify_prompt(sample_text)
print(f"[Sample] Prediction: {'Injection' if label == 1 else 'Normal'}, Confidence: {confidence:.2f}")