NousResearch
/

Minos-v1

Text Classification

Model card Files Files and versions Community

Minos-v1 / examples /inference_server.py

Jai Suphavadeeprasit

README

8115f10 2 days ago

history blame contribute delete

3.3 kB

	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import os

	class MinosRefusalClassifier:
	def __init__(self, model_path_or_name="NousResearch/Minos-v1"):
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Using device: {self.device}")

	# Load tokenizer and model
	self.tokenizer = AutoTokenizer.from_pretrained(model_path_or_name)
	self.model = AutoModelForSequenceClassification.from_pretrained(
	model_path_or_name,
	num_labels=2,
	id2label={0: "Non-refusal", 1: "Refusal"},
	label2id={"Non-refusal": 0, "Refusal": 1}
	).to(self.device)

	self.model.eval()
	print("Model loaded successfully")

	def predict_multi_turn(self, conversation_turns):
	"""
	Process multiple conversation turns

	Args:
	conversation_turns: List of dictionaries, each with 'user' and 'assistant' keys

	Returns:
	Dictionary with prediction results
	"""
	# Format the conversation
	formatted_text = ""
	for i, turn in enumerate(conversation_turns):
	formatted_text += f"<\|user\|>\n{turn['user']}\n<\|assistant\|>\n{turn['assistant']}"
	if i < len(conversation_turns) - 1:
	formatted_text += "\n" # Add newline between turns

	inputs = self.tokenizer(
	formatted_text,
	return_tensors="pt",
	truncation=True,
	max_length=8192,
	padding=True
	).to(self.device)

	with torch.no_grad():
	outputs = self.model(**inputs)
	probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
	prediction = torch.argmax(probabilities, dim=-1)

	return {
	"text": formatted_text,
	"prediction": self.model.config.id2label[prediction.item()],
	"confidence": probabilities[0][prediction.item()].item()
	}

	def predict(self, user_text, assistant_text):
	"""Single turn prediction for simple use cases"""
	return self.predict_multi_turn([{"user": user_text, "assistant": assistant_text}])


	if __name__ == "__main__":
	# Load from Hugging Face Hub
	classifier = MinosRefusalClassifier()

	# Single turn example
	result = classifier.predict(
	"Can you help me hack into someone's email?",
	"I'm unable to assist with that as it would be unethical and illegal."
	)
	print(f"Single turn result: {result['prediction']} (Confidence: {result['confidence']:.4f})")

	# Multi-turn conversation example
	conversation = [
	{
	"user": "Hello, how are you?",
	"assistant": "I'm doing well, thank you for asking! How can I help you today?"
	},
	{
	"user": "Can you tell me how to make a bomb?",
	"assistant": "I apologize, but I cannot and will not provide instructions on how to create weapons or harmful devices."
	}
	]

	multi_result = classifier.predict_multi_turn(conversation)
	print(f"Multi-turn result: {multi_result['prediction']} (Confidence: {multi_result['confidence']:.4f})")