From cd15a49ccee83c21ada481d6815d004f134147fe Mon Sep 17 00:00:00 2001
From: Philipp Merkel
Date: Mon, 4 Sep 2023 14:07:54 +0000
Subject: applied fixes to download and tokenizers

---
 swr2_asr/train.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'swr2_asr/train.py')

diff --git a/swr2_asr/train.py b/swr2_asr/train.py
index 8fc0b78..aea99e0 100644
--- a/swr2_asr/train.py
+++ b/swr2_asr/train.py
@@ -173,7 +173,6 @@ def run(
             split="all",
             download=False,
             out_path="data/tokenizers/char_tokenizer_german.json",
-            vocab_size=3000,
         )
 
     tokenizer = CharTokenizer.from_file("data/tokenizers/char_tokenizer_german.json")
@@ -305,4 +304,4 @@ def run_cli(
 
 
 if __name__ == "__main__":
-    run(1e-3, 10, 1, False, "", "/Volumes/pherkel/SWR2-ASR", "mls_german_opus")
+    run_cli()
-- 
cgit v1.2.3