diff options
author | Pherkel | 2023-09-18 12:44:34 +0200 |
---|---|---|
committer | Pherkel | 2023-09-18 12:44:34 +0200 |
commit | 9475900a1085b8277808b0a0b1555c59f7eb6d36 (patch) | |
tree | f9e17b2d15ed959bb8f405e648ce7103c1fe708e /data | |
parent | 0f14789f1c33d55dc270bcd154201cce2c4d516e (diff) |
small fixes
Diffstat (limited to 'data')
-rw-r--r-- | data/tokenizers/tokens_german.txt | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/data/tokenizers/tokens_german.txt b/data/tokenizers/tokens_german.txt new file mode 100644 index 0000000..57f2c3a --- /dev/null +++ b/data/tokenizers/tokens_german.txt @@ -0,0 +1,38 @@ +_ +<BLANK> +<UNK> +<SPACE> +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +é +à +ä +ö +ß +ü +- +' |