diff options
author | Pherkel | 2023-09-12 14:19:15 +0200 |
---|---|---|
committer | GitHub | 2023-09-12 14:19:15 +0200 |
commit | 7a9a6c783e69b5a537a3d3f5bfe8d5fdc656c807 (patch) | |
tree | 0725631b9b68aeb65b292420a15941dcfa3fc04f /data/tokenizers/char_tokenizer_german.json | |
parent | f9846193289c81d89342b6a36e951605c2cfa189 (diff) | |
parent | 7b71dab87591e04d874cd636614450b0e65e3f2b (diff) |
Merge pull request #37 from Algo-Boys/fix/ultimate
Fix/ultimate
Diffstat (limited to 'data/tokenizers/char_tokenizer_german.json')
-rw-r--r-- | data/tokenizers/char_tokenizer_german.json | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/data/tokenizers/char_tokenizer_german.json b/data/tokenizers/char_tokenizer_german.json new file mode 100644 index 0000000..20db079 --- /dev/null +++ b/data/tokenizers/char_tokenizer_german.json @@ -0,0 +1,38 @@ +_ 0 +<BLANK> 1 +<UNK> 2 +<SPACE> 3 +a 4 +b 5 +c 6 +d 7 +e 8 +f 9 +g 10 +h 11 +i 12 +j 13 +k 14 +l 15 +m 16 +n 17 +o 18 +p 19 +q 20 +r 21 +s 22 +t 23 +u 24 +v 25 +w 26 +x 27 +y 28 +z 29 +é 30 +à 31 +ä 32 +ö 33 +ß 34 +ü 35 +- 36 +' 37 |