{
  "pad_token": "[PAD]",    // Used to make all sequences the same length by padding
  "unk_token": "[UNK]",    // Used when the tokenizer encounters words it doesn't know
  "cls_token": "[CLS]",    // Often used at the start of sequences (Classification token)
  "sep_token": "[SEP]"     // Used to separate different parts of the input (Separator token)
}