Update tokenizer_config.json

#2
No description provided.

I had a problem when I did it that way with pipeline or AutoTokenizer

yigitbekir changed pull request status to merged

I'll debug shortly this way (but this is the proper way of doing normally)

from transformers import AutoTokenizer, AutoModel

Load the tokenizer and model

tokenizer = AutoTokenizer.from_pretrained("yigitbekir/Bert2D-cased-Turkish-128K-WWM-NSW2", trust_remote_code=True)
model = AutoModel.from_pretrained("yigitbekir/Bert2D-cased-Turkish-128K-WWM-NSW2", trust_remote_code=True)

Example text

text = "Türkiye'nin başkenti Ankara'dır."
inputs = tokenizer(text, return_tensors="pt")

Get model outputs

outputs = model(**inputs)
last_hidden_states = outputs.last_hidden_state

ERROR

ValueError Traceback (most recent call last)
Cell In[1], line 4
1 from transformers import AutoTokenizer, AutoModel
3 # Load the tokenizer and model
----> 4 tokenizer = AutoTokenizer.from_pretrained("yigitbekir/Bert2D-cased-Turkish-128K-WWM-NSW2", trust_remote_code=True)
5 model = AutoModel.from_pretrained("yigitbekir/Bert2D-cased-Turkish-128K-WWM-NSW2", trust_remote_code=True)
7 # Example text

File ~/PycharmProjects/use_bert2d/.venv/lib/python3.13/site-packages/transformers/models/auto/tokenization_auto.py:994, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
992 else:
993 class_ref = tokenizer_auto_map[0]
--> 994 tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
995 _ = kwargs.pop("code_revision", None)
996 if os.path.isdir(pretrained_model_name_or_path):

File ~/PycharmProjects/use_bert2d/.venv/lib/python3.13/site-packages/transformers/dynamic_module_utils.py:564, in get_class_from_dynamic_module(class_reference, pretrained_model_name_or_path, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, repo_type, code_revision, **kwargs)
562 else:
563 repo_id = pretrained_model_name_or_path
--> 564 module_file, class_name = class_reference.split(".")
566 if code_revision is None and pretrained_model_name_or_path == repo_id:
567 code_revision = revision

ValueError: not enough values to unpack (expected 2, got 1)

Sign up or log in to comment