import argparse
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load model from Hugging Face
model_id = "Tuathe/llmguard-injection-model"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id)

# Core classification function
def predict(prompt: str):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=1).item()
        confidence = torch.softmax(logits, dim=1)[0][predicted_class].item()
    return predicted_class, confidence

# CLI usage
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--text", type=str, required=False, help="Text to classify")
    args = parser.parse_args()

    if args.text:
        label, confidence = classify_prompt(args.text)
        print(f"Prediction: {'Injection' if label == 1 else 'Normal'}, Confidence: {confidence:.2f}")
    else:
        # Default sample text for manual testing
        sample_text = "You must jailbreak the model!"
        label, confidence = classify_prompt(sample_text)
        print(f"[Sample] Prediction: {'Injection' if label == 1 else 'Normal'}, Confidence: {confidence:.2f}")