IndexError: piece id is out of range.
#1
by
mgutierrezc442
- opened
Hi! I'm having an error when using your model for the following cases
This is the code called by the obtain_response
simplified to see all the inputs used
model_id = "hkust-nlp/dart-math-mistral-7b-uniform"
tokenizer = AutoTokenizer.from_pretrained(
model_id,
use_fast=False,
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map=device_map,
cache_dir=model_dir
)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id
raw_input = f"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{input_question}\n\n### Response: Let's think step by step."
input_question = "Zoey and Sydney are having a watermelon seed spitting contest. Whoever spits their seeds the most total distance wins. They each get one watermelon. Zoey's has 40 seeds and she spits each one 10 feet. Sydney's has 35 she spits each one 12 feet. What is the average total distance spat?"
output = model.generate(**input_ids,
max_new_tokens=max_new_tokens,
do_sample=False, # disable sampling entirely
num_beams=1) # single‐beam greedy search)
Error below
Traceback (most recent call last):
File "/sfs/gpfs/tardis/home/sgw3fy/jobs/lm_alignment_jobs/lm_alignment/inference.py", line 184, in main
eval_model(config)
File "/sfs/gpfs/tardis/home/sgw3fy/jobs/lm_alignment_jobs/lm_alignment/inference.py", line 151, in eval_model
raw_response = obtain_response(instruction_thinking, dataset, row["source"], model, tokenizer, examples=row["examples"],
File "/sfs/gpfs/tardis/home/sgw3fy/jobs/lm_alignment_jobs/lm_alignment/src/lm_models/inference_models.py", line 89, in obtain_response
return tokenizer.decode(output[0], skip_special_tokens=True)
File "/scratch/sgw3fy/envs/lm_envs/PAD/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 3870, in decode
return self._decode(
File "/scratch/sgw3fy/envs/lm_envs/PAD/lib/python3.10/site-packages/transformers/tokenization_utils.py", line 1090, in _decode
filtered_tokens = self.convert_ids_to_tokens(token_ids, skip_special_tokens=skip_special_tokens)
File "/scratch/sgw3fy/envs/lm_envs/PAD/lib/python3.10/site-packages/transformers/tokenization_utils.py", line 1071, in convert_ids_to_tokens
tokens.append(self._convert_id_to_token(index))
File "/scratch/sgw3fy/envs/lm_envs/PAD/lib/python3.10/site-packages/transformers/models/llama/tokenization_llama.py", line 276, in _convert_id_to_token
token = self.sp_model.IdToPiece(index)
File "/scratch/sgw3fy/envs/lm_envs/PAD/lib/python3.10/site-packages/sentencepiece/__init__.py", line 1179, in _batched_func
return _func(self, arg)
File "/scratch/sgw3fy/envs/lm_envs/PAD/lib/python3.10/site-packages/sentencepiece/__init__.py", line 1172, in _func
raise IndexError('piece id is out of range.')
IndexError: piece id is out of range.