aboutsummaryrefslogtreecommitdiff
path: root/swr2_asr/utils/tokenizer.py
diff options
context:
space:
mode:
Diffstat (limited to 'swr2_asr/utils/tokenizer.py')
-rw-r--r--swr2_asr/utils/tokenizer.py12
1 files changed, 0 insertions, 12 deletions
diff --git a/swr2_asr/utils/tokenizer.py b/swr2_asr/utils/tokenizer.py
index 22569eb..1cc7b84 100644
--- a/swr2_asr/utils/tokenizer.py
+++ b/swr2_asr/utils/tokenizer.py
@@ -120,15 +120,3 @@ class CharTokenizer:
load_tokenizer.char_map[char] = int(index)
load_tokenizer.index_map[int(index)] = char
return load_tokenizer
-
-
-if __name__ == "__main__":
- tokenizer = CharTokenizer.train("/Volumes/pherkel 1/SWR2-ASR", "mls_german_opus")
- print(tokenizer.char_map)
- print(tokenizer.index_map)
- print(tokenizer.get_vocab_size())
- print(tokenizer.get_blank_token())
- print(tokenizer.get_unk_token())
- print(tokenizer.get_space_token())
- print(tokenizer.encode("hallo welt"))
- print(tokenizer.decode([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))