dataset: download: True dataset_root_path: "YOUR/PATH" # files will be downloaded into this dir language_name: "mls_german_opus" limited_supervision: False # set to True if you want to use limited supervision dataset_percentage: 1.0 # percentage of dataset to use (1.0 = 100%) shuffle: True model: n_cnn_layers: 3 n_rnn_layers: 5 rnn_dim: 512 n_feats: 128 # number of mel features stride: 2 dropout: 0.3 # recommended to be around 0.4 for smaller datasets, 0.1 for really large datasets tokenizer: tokenizer_path: "data/tokenizers/char_tokenizer_german.json" decoder: type: "greedy" # greedy, or lm (beam search) lm: # config for lm decoder language_model_path: "data" # path where model and supplementary files are stored language: "german" n_gram: 3 # n-gram size of the language model, 3 or 5 beam_size: 50 beam_threshold: 50 n_best: 1 lm_weight: 2, word_score: 0, training: learning_rate: 0.0005 batch_size: 8 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU) epochs: 3 eval_every_n: 3 # evaluate every n epochs num_workers: 8 # number of workers for dataloader checkpoints: # use "~" to disable saving/loading model_load_path: "YOUR/PATH" # path to load model from model_save_path: "YOUR/PATH" # path to save model to inference: model_load_path: "YOUR/PATH" # path to load model from