aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPherkel2023-09-18 18:29:37 +0200
committerPherkel2023-09-18 18:29:37 +0200
commitf6e6377d90aa4771bdae7c73364d7fe3ae6bf507 (patch)
tree4907486cd22ea9f2edc02b7f550f5906d26d800a
parentf94506764bde3e4d41dc593e9d11aa7330c00e30 (diff)
small lm fixes
-rw-r--r--swr2_asr/utils/data.py2
-rw-r--r--swr2_asr/utils/decoder.py8
2 files changed, 6 insertions, 4 deletions
diff --git a/swr2_asr/utils/data.py b/swr2_asr/utils/data.py
index 74cd572..bb3233e 100644
--- a/swr2_asr/utils/data.py
+++ b/swr2_asr/utils/data.py
@@ -360,4 +360,4 @@ def create_lexicon(vocab_counts_path, lexicon_path):
file.write(f"{word} ")
for char in word:
file.write(char + " ")
- file.write("<SPACE>")
+ file.write("<SPACE>\n")
diff --git a/swr2_asr/utils/decoder.py b/swr2_asr/utils/decoder.py
index 1fd002a..38f218f 100644
--- a/swr2_asr/utils/decoder.py
+++ b/swr2_asr/utils/decoder.py
@@ -52,9 +52,11 @@ def get_beam_search_decoder(
)
if not os.path.isdir(os.path.join(lang_model_path, f"mls_lm_{language}")):
- url = f"https://dl.fbaipublicfiles.com/mls/mls_lm_{language}.tar.gz"
- torch.hub.download_url_to_file(url, f"data/mls_lm_{language}.tar.gz")
- _extract_tar("data/mls_lm_{language}.tar.gz", overwrite=True)
+ # check if zip file exists
+ if not os.path.isfile(f"data/mls_lm_{language}.tar.gz"):
+ url = f"https://dl.fbaipublicfiles.com/mls/mls_lm_{language}.tar.gz"
+ torch.hub.download_url_to_file(url, f"data/mls_lm_{language}.tar.gz")
+ _extract_tar(f"data/mls_lm_{language}.tar.gz", overwrite=True)
tokens_path = os.path.join(lang_model_path, f"mls_lm_{language}", "tokens.txt")
if not os.path.isfile(tokens_path):