aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config.philipp.yaml12
-rw-r--r--lm_decoder_hparams.ipynb69
2 files changed, 11 insertions, 70 deletions
diff --git a/config.philipp.yaml b/config.philipp.yaml
index 7a93d05..7e51804 100644
--- a/config.philipp.yaml
+++ b/config.philipp.yaml
@@ -18,17 +18,17 @@ tokenizer:
tokenizer_path: "data/tokenizers/char_tokenizer_german.json"
decoder:
- type: "greedy" # greedy, or lm (beam search)
+ type: "lm" # greedy, or lm (beam search)
lm: # config for lm decoder
language_model_path: "data" # path where model and supplementary files are stored
language: "german"
- n_gram: 3 # n-gram size of the language model, 3 or 5
- beam_size: 50
- beam_threshold: 50
+ n_gram: 5 # n-gram size of the language model, 3 or 5
+ beam_size: 500
+ beam_threshold: 150
n_best: 1
- lm_weight: 2
- word_score: 0
+ lm_weight: 5
+ word_score: -1
training:
learning_rate: 0.0005
diff --git a/lm_decoder_hparams.ipynb b/lm_decoder_hparams.ipynb
index 5e56312..23a2958 100644
--- a/lm_decoder_hparams.ipynb
+++ b/lm_decoder_hparams.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -15,20 +15,9 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/var/folders/lm/1zmdkgm91k912l2vgq978z800000gn/T/ipykernel_80481/3805229751.py:1: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
- " from tqdm.autonotebook import tqdm\n",
- "/Users/philippmerkel/DEV/SWR2-cool-projekt/.venv/lib/python3.10/site-packages/torchaudio/models/decoder/_ctc_decoder.py:62: UserWarning: The built-in flashlight integration is deprecated, and will be removed in future release. Please install flashlight-text. https://pypi.org/project/flashlight-text/ For the detail of CTC decoder migration, please see https://github.com/pytorch/audio/issues/3088.\n",
- " warnings.warn(\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"from tqdm.autonotebook import tqdm\n",
"\n",
@@ -45,57 +34,9 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "34aafd9aca2541748dc41d8550334536",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- " 0%| | 0/144 [00:00<?, ?it/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Download flag not set, skipping download\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/philippmerkel/DEV/SWR2-cool-projekt/.venv/lib/python3.10/site-packages/torchaudio/functional/functional.py:576: UserWarning: At least one mel filterbank has all zero values. The value for `n_mels` (128) may be set too high. Or, the value for `n_freqs` (201) may be set too low.\n",
- " warnings.warn(\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "New best WER: 0.8266228565397248 CER: 0.6048691547202959\n",
- "Config: {'language': 'german', 'language_model_path': 'data', 'n_gram': 3, 'beam_size': 25, 'beam_threshold': 10, 'n_best': 1, 'lm_weight': 0, 'word_score': -1.5, 'beam_size_token': 10}\n",
- "LM Weight: 0 Word Score: -1.5 Beam Size: 25 Beam Threshold: 10 Beam Size Token: 10\n",
- "--------------------------------------------------------------\n",
- "New best WER: 0.7900706123452581 CER: 0.49197597466135945\n",
- "Config: {'language': 'german', 'language_model_path': 'data', 'n_gram': 3, 'beam_size': 25, 'beam_threshold': 50, 'n_best': 1, 'lm_weight': 0, 'word_score': -1.5, 'beam_size_token': 10}\n",
- "LM Weight: 0 Word Score: -1.5 Beam Size: 25 Beam Threshold: 50 Beam Size Token: 10\n",
- "--------------------------------------------------------------\n",
- "New best WER: 0.7877685082828738 CER: 0.48660732878914315\n",
- "Config: {'language': 'german', 'language_model_path': 'data', 'n_gram': 3, 'beam_size': 100, 'beam_threshold': 50, 'n_best': 1, 'lm_weight': 0, 'word_score': -1.5, 'beam_size_token': 10}\n",
- "LM Weight: 0 Word Score: -1.5 Beam Size: 100 Beam Threshold: 50 Beam Size Token: 10\n",
- "--------------------------------------------------------------\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"\n",
"\n",