diff options
Diffstat (limited to 'swr2_asr/tokenizer.py')
-rw-r--r-- | swr2_asr/tokenizer.py | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/swr2_asr/tokenizer.py b/swr2_asr/tokenizer.py index 5758da7..8e3bf09 100644 --- a/swr2_asr/tokenizer.py +++ b/swr2_asr/tokenizer.py @@ -302,6 +302,7 @@ def train_bpe_tokenizer( "ü", ] + # TODO: add padding token / whitespace token / special tokens trainer = BpeTrainer( special_tokens=["[UNK]"], vocab_size=vocab_size, |