|
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
import re |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("junaid1993/distilroberta-bot-detection") |
|
model = AutoModelForSequenceClassification.from_pretrained("junaid1993/distilroberta-bot-detection") |
|
|
|
def preprocess_text(text): |
|
if not isinstance(text, str): |
|
return "" |
|
text = re.sub(r'http\S+|www\.\S+', '', text) |
|
text = re.sub(r'[@#]', '', text) |
|
text = re.sub(r'[^\w\s]', '', text) |
|
text = re.sub(r'\d+', '', text) |
|
text = re.sub(r'\s+', ' ', text).strip() |
|
return text.lower() |
|
|
|
def predict_bot(text): |
|
clean_text = preprocess_text(text) |
|
inputs = tokenizer(clean_text, return_tensors="pt", truncation=True, max_length=512) |
|
|
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
|
|
bot_prob = probabilities[0][1].item() |
|
prediction = "Bot" if bot_prob > 0.5 else "Human" |
|
|
|
return {"prediction": prediction, "bot_probability": bot_prob} |
|
|
|
|
|
examples = [ |
|
"π₯ AMAZING DEAL! Get 90% OFF now!", |
|
"Just finished reading a great book about AI." |
|
] |
|
|
|
for text in examples: |
|
result = predict_bot(text) |
|
print(f"Text: {text}") |
|
print(f"Prediction: {result['prediction']} ({result['bot_probability']:.3f})") |
|
print("-" * 50) |
|
|