diff options
-rw-r--r-- | config.philipp.yaml | 12 | ||||
-rw-r--r-- | lm_decoder_hparams.ipynb | 69 |
2 files changed, 11 insertions, 70 deletions
diff --git a/config.philipp.yaml b/config.philipp.yaml index 7a93d05..7e51804 100644 --- a/config.philipp.yaml +++ b/config.philipp.yaml @@ -18,17 +18,17 @@ tokenizer: tokenizer_path: "data/tokenizers/char_tokenizer_german.json" decoder: - type: "greedy" # greedy, or lm (beam search) + type: "lm" # greedy, or lm (beam search) lm: # config for lm decoder language_model_path: "data" # path where model and supplementary files are stored language: "german" - n_gram: 3 # n-gram size of the language model, 3 or 5 - beam_size: 50 - beam_threshold: 50 + n_gram: 5 # n-gram size of the language model, 3 or 5 + beam_size: 500 + beam_threshold: 150 n_best: 1 - lm_weight: 2 - word_score: 0 + lm_weight: 5 + word_score: -1 training: learning_rate: 0.0005 diff --git a/lm_decoder_hparams.ipynb b/lm_decoder_hparams.ipynb index 5e56312..23a2958 100644 --- a/lm_decoder_hparams.ipynb +++ b/lm_decoder_hparams.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -15,20 +15,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/lm/1zmdkgm91k912l2vgq978z800000gn/T/ipykernel_80481/3805229751.py:1: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", - " from tqdm.autonotebook import tqdm\n", - "/Users/philippmerkel/DEV/SWR2-cool-projekt/.venv/lib/python3.10/site-packages/torchaudio/models/decoder/_ctc_decoder.py:62: UserWarning: The built-in flashlight integration is deprecated, and will be removed in future release. Please install flashlight-text. https://pypi.org/project/flashlight-text/ For the detail of CTC decoder migration, please see https://github.com/pytorch/audio/issues/3088.\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "from tqdm.autonotebook import tqdm\n", "\n", @@ -45,57 +34,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "34aafd9aca2541748dc41d8550334536", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/144 [00:00<?, ?it/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Download flag not set, skipping download\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/philippmerkel/DEV/SWR2-cool-projekt/.venv/lib/python3.10/site-packages/torchaudio/functional/functional.py:576: UserWarning: At least one mel filterbank has all zero values. The value for `n_mels` (128) may be set too high. Or, the value for `n_freqs` (201) may be set too low.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "New best WER: 0.8266228565397248 CER: 0.6048691547202959\n", - "Config: {'language': 'german', 'language_model_path': 'data', 'n_gram': 3, 'beam_size': 25, 'beam_threshold': 10, 'n_best': 1, 'lm_weight': 0, 'word_score': -1.5, 'beam_size_token': 10}\n", - "LM Weight: 0 Word Score: -1.5 Beam Size: 25 Beam Threshold: 10 Beam Size Token: 10\n", - "--------------------------------------------------------------\n", - "New best WER: 0.7900706123452581 CER: 0.49197597466135945\n", - "Config: {'language': 'german', 'language_model_path': 'data', 'n_gram': 3, 'beam_size': 25, 'beam_threshold': 50, 'n_best': 1, 'lm_weight': 0, 'word_score': -1.5, 'beam_size_token': 10}\n", - "LM Weight: 0 Word Score: -1.5 Beam Size: 25 Beam Threshold: 50 Beam Size Token: 10\n", - "--------------------------------------------------------------\n", - "New best WER: 0.7877685082828738 CER: 0.48660732878914315\n", - "Config: {'language': 'german', 'language_model_path': 'data', 'n_gram': 3, 'beam_size': 100, 'beam_threshold': 50, 'n_best': 1, 'lm_weight': 0, 'word_score': -1.5, 'beam_size_token': 10}\n", - "LM Weight: 0 Word Score: -1.5 Beam Size: 100 Beam Threshold: 50 Beam Size Token: 10\n", - "--------------------------------------------------------------\n" - ] - } - ], + "outputs": [], "source": [ "\n", "\n", |