# Copyright (c) Meta Platforms, Inc. and affiliates. | |
# All rights reserved. | |
# | |
# This source code is licensed under the BSD-style license found in the | |
# LICENSE file in the root directory of this source tree. | |
from abc import ABC, abstractmethod | |
class Tokenizer(ABC): | |
# basic tokenizer interface, for typing purpose mainly | |
def __init__(self): | |
self._n_words = 8 | |
def encode(self, *args, **kwargs) -> list[int]: | |
... | |
def decode(self, *args, **kwargs) -> str: | |
... | |
def n_words(self) -> int: | |
return self._n_words | |