small fixes

author: Pherkel 2023-09-18 12:44:34 +0200
committer: Pherkel 2023-09-18 12:44:34 +0200
commit: 9475900a1085b8277808b0a0b1555c59f7eb6d36 (patch)
tree: f9e17b2d15ed959bb8f405e648ce7103c1fe708e /swr2_asr/utils/tokenizer.py
parent: 0f14789f1c33d55dc270bcd154201cce2c4d516e (diff)
1 files changed, 6 insertions, 8 deletions
diff --git a/swr2_asr/utils/tokenizer.py b/swr2_asr/utils/tokenizer.py
index 9abf57d..b1de83e 100644
--- a/swr2_asr/utils/tokenizer.py
+++ b/swr2_asr/utils/tokenizer.py
@@ -120,11 +120,9 @@ class CharTokenizer:
                     load_tokenizer.char_map[char] = int(index)
                     load_tokenizer.index_map[int(index)] = char
         return load_tokenizer
-    
-
-    #TO DO check about the weird unknown tokens etc.
-    def create_txt(self,path:str):
-      with open(path, 'w',encoding="utf-8") as file:
-        for key,value in self.char_map():
-           file.write(f"{key}\n")
-        
-\ No newline at end of file
+
+    def create_tokens_txt(self, path: str):
+        """Create a txt file with all the characters"""
+        with open(path, "w", encoding="utf-8") as file:
+            for char, _ in self.char_map.items():
+                file.write(f"{char}\n")
author	Pherkel	2023-09-18 12:44:34 +0200
committer	Pherkel	2023-09-18 12:44:34 +0200
commit	9475900a1085b8277808b0a0b1555c59f7eb6d36 (patch)
tree	f9e17b2d15ed959bb8f405e648ce7103c1fe708e /swr2_asr/utils/tokenizer.py
parent	0f14789f1c33d55dc270bcd154201cce2c4d516e (diff)