aboutsummaryrefslogtreecommitdiff
path: root/config.philipp.yaml
diff options
context:
space:
mode:
authorPherkel2023-09-18 14:25:36 +0200
committerPherkel2023-09-18 14:25:36 +0200
commitd5e482b7dc3d8b6acc48a883ae9b53b354fa1715 (patch)
tree580f0ab45784664978d8f24c4831f3eec1bceb2e /config.philipp.yaml
parentd5689047fa7062b284d13271bda39013dcf6150f (diff)
decoder changes
Diffstat (limited to 'config.philipp.yaml')
-rw-r--r--config.philipp.yaml55
1 files changed, 33 insertions, 22 deletions
diff --git a/config.philipp.yaml b/config.philipp.yaml
index f72ce2e..38a68f8 100644
--- a/config.philipp.yaml
+++ b/config.philipp.yaml
@@ -1,34 +1,45 @@
+dataset:
+ download: True
+ dataset_root_path: "/Volumes/pherkel 2/SWR2-ASR" # files will be downloaded into this dir
+ language_name: "mls_german_opus"
+ limited_supervision: True # set to True if you want to use limited supervision
+ dataset_percentage: 0.15 # percentage of dataset to use (1.0 = 100%)
+ shuffle: True
+
model:
n_cnn_layers: 3
n_rnn_layers: 5
rnn_dim: 512
n_feats: 128 # number of mel features
stride: 2
- dropout: 0.2 # recommended to be around 0.4-0.6 for smaller datasets, 0.1 for really large datasets
-
-training:
- learning_rate: 0.0005
- batch_size: 32 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
- epochs: 150
- eval_every_n: 5 # evaluate every n epochs
- num_workers: 4 # number of workers for dataloader
- device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically
-
-dataset:
- download: true
- dataset_root_path: "data" # files will be downloaded into this dir
- language_name: "mls_german_opus"
- limited_supervision: false # set to True if you want to use limited supervision
- dataset_percentage: 1 # percentage of dataset to use (1.0 = 100%)
- shuffle: true
+ dropout: 0.6 # recommended to be around 0.4 for smaller datasets, 0.1 for really large datasets
tokenizer:
tokenizer_path: "data/tokenizers/char_tokenizer_german.json"
-checkpoints:
- model_load_path: "data/runs/epoch31" # path to load model from
- model_save_path: "data/runs/epoch" # path to save model to
+decoder:
+ type: "lm" # greedy, or lm (beam search)
+
+ lm: # config for lm decoder
+ language_model_path: "data" # path where model and supplementary files are stored
+ language: "german"
+ n_gram: 3 # n-gram size of the language model, 3 or 5
+ beam_size: 50
+ beam_threshold: 50
+ n_best: 1
+ lm_weight: 2
+ word_score: 0
+
+training:
+ learning_rate: 0.0005
+ batch_size: 8 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
+ epochs: 3
+ eval_every_n: 3 # evaluate every n epochs
+ num_workers: 8 # number of workers for dataloader
+
+checkpoints: # use "~" to disable saving/loading
+ model_load_path: "YOUR/PATH" # path to load model from
+ model_save_path: "YOUR/PATH" # path to save model to
inference:
- model_load_path: "data/runs/epoch30" # path to load model from
- device: "cuda" # device to run inference on if gpu is available, else "cpu" will be set automatically \ No newline at end of file
+ model_load_path: "data/epoch67" # path to load model from \ No newline at end of file