From 80544737bc4338bd0cde305b8bccb7c5209e1bdc Mon Sep 17 00:00:00 2001 From: JoJoBarthold2 Date: Sat, 16 Sep 2023 15:23:56 +0200 Subject: added a method to create a txt for the ctc decoder --- swr2_asr/utils/tokenizer.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'swr2_asr/utils/tokenizer.py') diff --git a/swr2_asr/utils/tokenizer.py b/swr2_asr/utils/tokenizer.py index 1cc7b84..ee89cdb 100644 --- a/swr2_asr/utils/tokenizer.py +++ b/swr2_asr/utils/tokenizer.py @@ -120,3 +120,9 @@ class CharTokenizer: load_tokenizer.char_map[char] = int(index) load_tokenizer.index_map[int(index)] = char return load_tokenizer + + def create_txt(self,path:str): + with open(path, 'w',encoding="utf-8") as file: + for key,value in self.char_map(): + file.write(f"{key}\n") + \ No newline at end of file -- cgit v1.2.3