Spaces:

Dovakiins
/

qwerrwe

Build error

tmm1 commited on Aug 13, 2023

Commit

e029ab3

1 Parent(s): 8cec513

quiet noise from llama tokenizer by setting pad token earlier

Files changed (1) hide show

src/axolotl/utils/models.py CHANGED Viewed

@@ -59,17 +59,17 @@ def load_tokenizer(
             **tokenizer_kwargs,
         )
-    LOG.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}")
-    LOG.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}")
-    LOG.debug(f"PAD: {tokenizer.pad_token_id} / {tokenizer.pad_token}")
-    LOG.debug(f"UNK: {tokenizer.unk_token_id} / {tokenizer.unk_token}")
     if tokenizer.__class__.__name__ in [
         "LlamaTokenizer",
         "LlamaTokenizerFast",
     ]:
         tokenizer.pad_token = LLAMA_DEFAULT_PAD_TOKEN
     if tokenizer.__class__.__name__ == "GPTNeoXTokenizerFast":
         tokenizer.add_special_tokens({"pad_token": "[PAD]"})
         os.environ["TOKENIZERS_PARALLELISM"] = "false"

             **tokenizer_kwargs,
         )
     if tokenizer.__class__.__name__ in [
         "LlamaTokenizer",
         "LlamaTokenizerFast",
     ]:
         tokenizer.pad_token = LLAMA_DEFAULT_PAD_TOKEN
+    LOG.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}")
+    LOG.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}")
+    LOG.debug(f"PAD: {tokenizer.pad_token_id} / {tokenizer.pad_token}")
+    LOG.debug(f"UNK: {tokenizer.unk_token_id} / {tokenizer.unk_token}")
     if tokenizer.__class__.__name__ == "GPTNeoXTokenizerFast":
         tokenizer.add_special_tokens({"pad_token": "[PAD]"})
         os.environ["TOKENIZERS_PARALLELISM"] = "false"