From 9475900a1085b8277808b0a0b1555c59f7eb6d36 Mon Sep 17 00:00:00 2001 From: Pherkel Date: Mon, 18 Sep 2023 12:44:34 +0200 Subject: small fixes --- swr2_asr/utils/tokenizer.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'swr2_asr/utils/tokenizer.py') diff --git a/swr2_asr/utils/tokenizer.py b/swr2_asr/utils/tokenizer.py index 9abf57d..b1de83e 100644 --- a/swr2_asr/utils/tokenizer.py +++ b/swr2_asr/utils/tokenizer.py @@ -120,11 +120,9 @@ class CharTokenizer: load_tokenizer.char_map[char] = int(index) load_tokenizer.index_map[int(index)] = char return load_tokenizer - - - #TO DO check about the weird unknown tokens etc. - def create_txt(self,path:str): - with open(path, 'w',encoding="utf-8") as file: - for key,value in self.char_map(): - file.write(f"{key}\n") - \ No newline at end of file + + def create_tokens_txt(self, path: str): + """Create a txt file with all the characters""" + with open(path, "w", encoding="utf-8") as file: + for char, _ in self.char_map.items(): + file.write(f"{char}\n") -- cgit v1.2.3