matx.text.wordpiece_tokenizer module

class matx.text.wordpiece_tokenizer.WordPieceTokenizer(vocab_path: str, lookup_id: bool = True, unk_token: Any = '[UNK]', subwords_prefix: str = '##', skip_empty: bool = True, max_bytes_per_token: int = 100)[source]

Bases: object

__init__(vocab_path: str, lookup_id: bool = True, unk_token: Any = '[UNK]', subwords_prefix: str = '##', skip_empty: bool = True, max_bytes_per_token: int = 100) None[source]
tokenize(sentence: List) List[source]
tokenizer_with_meta(sentence: List) Tuple[List, List[int]][source]
class matx.text.wordpiece_tokenizer.WordPieceTokenizerImpl(vocab_path: str, lookup_id: bool = True, unk_token: Any = '[UNK]', subwords_prefix: str = '##', skip_empty: bool = True, max_bytes_per_token: int = 100)[source]

Bases: object

__init__(vocab_path: str, lookup_id: bool = True, unk_token: Any = '[UNK]', subwords_prefix: str = '##', skip_empty: bool = True, max_bytes_per_token: int = 100) None[source]
tokenize(sentence: List) List[source]
tokenize_with_meta(sentence: List) Tuple[List, List[int]][source]