Post
126
"What's wrong with using huggingface transformers?"
Here's a quick example. Am I supposed to be going in with the full knowledge of the inner workings of a LLM model?
Here's a quick example. Am I supposed to be going in with the full knowledge of the inner workings of a LLM model?
import pathlib
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
tokenizer = AutoTokenizer.from_pretrained("<ModernBERT>")
# Triton is **required**, but no where in the documentation is specified that triton is needed.
# Installing triton in windows isn't super straightforward. Thankfully someone has already built wheels for it.
# - https://github.com/woct0rdho/triton-windows/releases
model = AutoModelForSequenceClassification.from_pretrained(
"<ModernBERT>", # reference_compile=False
)
# By default it uses CPU. Which is slow. Move to a cuda device.
# This will actually error out if you use "gpu" instead.
model = model.to("cuda")
with torch.no_grad():
# Not setting `return_tensors="pt"` causes
# File "C:\Program Files\Python310\lib\site-packages\transformers\modeling_utils.py", line 5311, in warn_if_padding_and_no_attention_mask
# if self.config.pad_token_id in input_ids[:, [-1, 0]]:
# TypeError: list indices must be integers or slices, not tuple
# or...
# File "C:\Program Files\Python310\lib\site-packages\transformers\models\modernbert\modeling_modernbert.py", line 836, in forward
# batch_size, seq_len = input_ids.shape[:2]
# AttributeError: 'list' object has no attribute 'shape'
block = tokenizer(
pathlib.Path("test-fic.txt").read_text("utf-8"), return_tensors="pt"
)
block = block.to("cuda")
# **block is needed to fix "AttributeError: 'NoneType' object has no attribute 'unsqueeze'" on attention_mask.unsqueeze(-1)
logits = model(**block).logits
# Not moving to cpu will cause the sigmoid/softmax ops to fail.
logits = logits.to("cpu")
# print(logits)
predicted_class_ids = torch.softmax(logits, -1)[
0
].numpy()