|
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
import torch
|
|
|
|
|
|
model_name = "data-silence/news_classifier_ft"
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
model = model.to(device)
|
|
model.eval()
|
|
|
|
|
|
id2label = {
|
|
0: 'climate', 1: 'conflicts', 2: 'culture', 3: 'economy', 4: 'gloss',
|
|
5: 'health', 6: 'politics', 7: 'science', 8: 'society', 9: 'sports', 10: 'travel'
|
|
}
|
|
|
|
def predict(text):
|
|
|
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
|
|
|
|
|
|
with torch.no_grad():
|
|
outputs = model(**inputs)
|
|
|
|
|
|
predicted_label_id = outputs.logits.argmax(-1).item()
|
|
|
|
|
|
predicted_label = id2label[predicted_label_id]
|
|
|
|
return {"label": predicted_label} |