diff options
Diffstat (limited to 'config.docker.yaml')
-rw-r--r-- | config.docker.yaml | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/config.docker.yaml b/config.docker.yaml new file mode 100644 index 0000000..51e30f4 --- /dev/null +++ b/config.docker.yaml @@ -0,0 +1,45 @@ +dataset: + download: True + dataset_root_path: "data/datasets" # files will be downloaded into this dir + language_name: "mls_german_opus" + limited_supervision: False # set to True if you want to use limited supervision + dataset_percentage: 1 # percentage of dataset to use (1.0 = 100%) + shuffle: True + +model: + n_cnn_layers: 3 + n_rnn_layers: 7 + rnn_dim: 512 + n_feats: 256 # number of mel features + stride: 2 + dropout: 0.2 # recommended to be around 0.4 for smaller datasets, 0.1 for really large datasets + +tokenizer: + tokenizer_path: "data/tokenizers/char_tokenizer_german.json" + +decoder: + type: "greedy" # greedy, or lm (beam search) + + lm: # config for lm decoder + language_model_path: "data" # path where model and supplementary files are stored + language: "german" + n_gram: 5 # n-gram size of the language model, 3 or 5 + beam_size: 500 + beam_threshold: 150 + n_best: 1 + lm_weight: 1 + word_score: 1 + +training: + learning_rate: 0.0005 + batch_size: 32 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU) + epochs: 100 + eval_every_n: 5 # evaluate every n epochs + num_workers: 4 # number of workers for dataloader + +checkpoints: # use "~" to disable saving/loading + model_load_path: ~ # path to load model from + model_save_path: "data/runs/01/epoch" # path to save model to + +inference: + model_load_path: "data/epoch67" # path to load model from |