diff options
Diffstat (limited to 'swr2_asr/utils/tokenizer.py')
-rw-r--r-- | swr2_asr/utils/tokenizer.py | 14 |
1 files changed, 6 insertions, 8 deletions
diff --git a/swr2_asr/utils/tokenizer.py b/swr2_asr/utils/tokenizer.py index 9abf57d..b1de83e 100644 --- a/swr2_asr/utils/tokenizer.py +++ b/swr2_asr/utils/tokenizer.py @@ -120,11 +120,9 @@ class CharTokenizer: load_tokenizer.char_map[char] = int(index) load_tokenizer.index_map[int(index)] = char return load_tokenizer - - - #TO DO check about the weird unknown tokens etc. - def create_txt(self,path:str): - with open(path, 'w',encoding="utf-8") as file: - for key,value in self.char_map(): - file.write(f"{key}\n") -
\ No newline at end of file + + def create_tokens_txt(self, path: str): + """Create a txt file with all the characters""" + with open(path, "w", encoding="utf-8") as file: + for char, _ in self.char_map.items(): + file.write(f"{char}\n") |