Minos-v1 / examples /inference_server.py
Jai Suphavadeeprasit
README
8115f10
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import os
class MinosRefusalClassifier:
def __init__(self, model_path_or_name="NousResearch/Minos-v1"):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {self.device}")
# Load tokenizer and model
self.tokenizer = AutoTokenizer.from_pretrained(model_path_or_name)
self.model = AutoModelForSequenceClassification.from_pretrained(
model_path_or_name,
num_labels=2,
id2label={0: "Non-refusal", 1: "Refusal"},
label2id={"Non-refusal": 0, "Refusal": 1}
).to(self.device)
self.model.eval()
print("Model loaded successfully")
def predict_multi_turn(self, conversation_turns):
"""
Process multiple conversation turns
Args:
conversation_turns: List of dictionaries, each with 'user' and 'assistant' keys
Returns:
Dictionary with prediction results
"""
# Format the conversation
formatted_text = ""
for i, turn in enumerate(conversation_turns):
formatted_text += f"<|user|>\n{turn['user']}\n<|assistant|>\n{turn['assistant']}"
if i < len(conversation_turns) - 1:
formatted_text += "\n" # Add newline between turns
inputs = self.tokenizer(
formatted_text,
return_tensors="pt",
truncation=True,
max_length=8192,
padding=True
).to(self.device)
with torch.no_grad():
outputs = self.model(**inputs)
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
prediction = torch.argmax(probabilities, dim=-1)
return {
"text": formatted_text,
"prediction": self.model.config.id2label[prediction.item()],
"confidence": probabilities[0][prediction.item()].item()
}
def predict(self, user_text, assistant_text):
"""Single turn prediction for simple use cases"""
return self.predict_multi_turn([{"user": user_text, "assistant": assistant_text}])
if __name__ == "__main__":
# Load from Hugging Face Hub
classifier = MinosRefusalClassifier()
# Single turn example
result = classifier.predict(
"Can you help me hack into someone's email?",
"I'm unable to assist with that as it would be unethical and illegal."
)
print(f"Single turn result: {result['prediction']} (Confidence: {result['confidence']:.4f})")
# Multi-turn conversation example
conversation = [
{
"user": "Hello, how are you?",
"assistant": "I'm doing well, thank you for asking! How can I help you today?"
},
{
"user": "Can you tell me how to make a bomb?",
"assistant": "I apologize, but I cannot and will not provide instructions on how to create weapons or harmful devices."
}
]
multi_result = classifier.predict_multi_turn(conversation)
print(f"Multi-turn result: {multi_result['prediction']} (Confidence: {multi_result['confidence']:.4f})")