#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#
import argparse
from typing import Any
from torch import nn
[docs]class BaseTokenizer(nn.Module):
[docs] def __init__(self, opts, *args, **kwargs):
super().__init__()
self.opts = opts
[docs] @classmethod
def add_arguments(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
group = parser.add_argument_group(title=cls.__name__)
group.add_argument(
"--text-tokenizer.name",
type=str,
default=None,
help="Name of the text tokenizer.",
)
return parser
[docs] def get_vocab_size(self):
raise NotImplementedError
[docs] def get_eot_token(self):
raise NotImplementedError
[docs] def get_sot_token(self):
raise NotImplementedError
[docs] def get_encodings(self):
raise NotImplementedError
[docs] def forward(self, input_sentence: Any, *args, **kwargs) -> Any:
raise NotImplementedError