aboutsummaryrefslogtreecommitdiff
path: root/config.philipp.yaml
diff options
context:
space:
mode:
authorPherkel2023-09-11 21:52:42 +0200
committerPherkel2023-09-11 21:52:42 +0200
commit58b30927bd870604a4077a8af9ec3cad7b0be21c (patch)
tree7dd492fa8f14ff61c88545448972022ead324c31 /config.philipp.yaml
parent9ca17d8a83369257f4cc42c963e25baf35a28f8f (diff)
changed config to yaml!
Diffstat (limited to 'config.philipp.yaml')
-rw-r--r--config.philipp.yaml29
1 files changed, 29 insertions, 0 deletions
diff --git a/config.philipp.yaml b/config.philipp.yaml
new file mode 100644
index 0000000..638b5ef
--- /dev/null
+++ b/config.philipp.yaml
@@ -0,0 +1,29 @@
+model:
+ n_cnn_layers: 3
+ n_rnn_layers: 5
+ rnn_dim: 512
+ n_feats: 128 # number of mel features
+ stride: 2
+ dropout: 0.25 # recommended to be around 0.4 for smaller datasets, 0.1 for really large datasets
+
+training:
+ learning_rate: 0.0005
+ batch_size: 2 # recommended to maximum number that fits on the GPU (batch size of 32 fits on a 12GB GPU)
+ epochs: 3
+ eval_every_n: 1 # evaluate every n epochs
+ num_workers: 4 # number of workers for dataloader
+
+dataset:
+ download: True
+ dataset_root_path: "/Volumes/pherkel 1/SWR2-ASR" # files will be downloaded into this dir
+ language_name: "mls_german_opus"
+ limited_supervision: True # set to True if you want to use limited supervision
+ dataset_percentage: 0.01 # percentage of dataset to use (1.0 = 100%)
+ shuffle: True
+
+tokenizer:
+ tokenizer_path: "data/tokenizers/char_tokenizer_german.json"
+
+checkpoints:
+ model_load_path: ~ # path to load model from
+ model_save_path: ~ # path to save model to \ No newline at end of file