From 8be140b38183b7465b5888a15b536a5f7fa66db6 Mon Sep 17 00:00:00 2001 From: Pherkel Date: Mon, 11 Sep 2023 20:45:32 +0200 Subject: added tokenizer to git and tokenizer training routing --- data/tokenizers/char_tokenizer_german.json | 38 ++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 data/tokenizers/char_tokenizer_german.json (limited to 'data') diff --git a/data/tokenizers/char_tokenizer_german.json b/data/tokenizers/char_tokenizer_german.json new file mode 100644 index 0000000..20db079 --- /dev/null +++ b/data/tokenizers/char_tokenizer_german.json @@ -0,0 +1,38 @@ +_ 0 + 1 + 2 + 3 +a 4 +b 5 +c 6 +d 7 +e 8 +f 9 +g 10 +h 11 +i 12 +j 13 +k 14 +l 15 +m 16 +n 17 +o 18 +p 19 +q 20 +r 21 +s 22 +t 23 +u 24 +v 25 +w 26 +x 27 +y 28 +z 29 +é 30 +à 31 +ä 32 +ö 33 +ß 34 +ü 35 +- 36 +' 37 -- cgit v1.2.3 From 0e457b1a553ca2d9f4e2449c620925c634a82efd Mon Sep 17 00:00:00 2001 From: Pherkel Date: Mon, 11 Sep 2023 20:46:08 +0200 Subject: moved own recording to data dir --- .gitignore | 2 +- "Silja_erkl\303\244rt_die_welt.flac" | Bin 8211720 -> 0 bytes "data/own/Silja_erkl\303\244rt_die_welt.flac" | Bin 0 -> 8211720 bytes data/own/marvin_rede.flac | Bin 0 -> 3976026 bytes data/own/valentins_diss_gegen_marvin.flac | Bin 0 -> 706852 bytes marvin_rede.flac | Bin 3976026 -> 0 bytes valentins_diss_gegen_marvin.flac | Bin 706852 -> 0 bytes 7 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 "Silja_erkl\303\244rt_die_welt.flac" create mode 100644 "data/own/Silja_erkl\303\244rt_die_welt.flac" create mode 100644 data/own/marvin_rede.flac create mode 100644 data/own/valentins_diss_gegen_marvin.flac delete mode 100644 marvin_rede.flac delete mode 100644 valentins_diss_gegen_marvin.flac (limited to 'data') diff --git a/.gitignore b/.gitignore index d21ddb6..57026ea 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ # Training files data/* !data/tokenizers - +!data/own # Mac **/.DS_Store diff --git "a/Silja_erkl\303\244rt_die_welt.flac" "b/Silja_erkl\303\244rt_die_welt.flac" deleted file mode 100644 index da51c0c..0000000 Binary files "a/Silja_erkl\303\244rt_die_welt.flac" and /dev/null differ diff --git "a/data/own/Silja_erkl\303\244rt_die_welt.flac" "b/data/own/Silja_erkl\303\244rt_die_welt.flac" new file mode 100644 index 0000000..da51c0c Binary files /dev/null and "b/data/own/Silja_erkl\303\244rt_die_welt.flac" differ diff --git a/data/own/marvin_rede.flac b/data/own/marvin_rede.flac new file mode 100644 index 0000000..43d171d Binary files /dev/null and b/data/own/marvin_rede.flac differ diff --git a/data/own/valentins_diss_gegen_marvin.flac b/data/own/valentins_diss_gegen_marvin.flac new file mode 100644 index 0000000..9c15644 Binary files /dev/null and b/data/own/valentins_diss_gegen_marvin.flac differ diff --git a/marvin_rede.flac b/marvin_rede.flac deleted file mode 100644 index 43d171d..0000000 Binary files a/marvin_rede.flac and /dev/null differ diff --git a/valentins_diss_gegen_marvin.flac b/valentins_diss_gegen_marvin.flac deleted file mode 100644 index 9c15644..0000000 Binary files a/valentins_diss_gegen_marvin.flac and /dev/null differ -- cgit v1.2.3