aboutsummaryrefslogtreecommitdiff
path: root/swr2_asr
diff options
context:
space:
mode:
Diffstat (limited to 'swr2_asr')
-rw-r--r--swr2_asr/train.py4
-rw-r--r--swr2_asr/utils/data.py7
2 files changed, 6 insertions, 5 deletions
diff --git a/swr2_asr/train.py b/swr2_asr/train.py
index eb79ee2..ca70d21 100644
--- a/swr2_asr/train.py
+++ b/swr2_asr/train.py
@@ -223,8 +223,8 @@ def main(config_path: str):
)
tokenizer = CharTokenizer.from_file(tokenizer_config["tokenizer_path"])
- train_data_processing = DataProcessing("train", tokenizer)
- valid_data_processing = DataProcessing("valid", tokenizer)
+ train_data_processing = DataProcessing("train", tokenizer, {"n_feats": model_config["n_feats"]})
+ valid_data_processing = DataProcessing("valid", tokenizer, {"n_feats": model_config["n_feats"]})
train_loader = DataLoader(
dataset=train_dataset,
diff --git a/swr2_asr/utils/data.py b/swr2_asr/utils/data.py
index 0e06eec..10f0ea8 100644
--- a/swr2_asr/utils/data.py
+++ b/swr2_asr/utils/data.py
@@ -15,18 +15,19 @@ from swr2_asr.utils.tokenizer import CharTokenizer
class DataProcessing:
"""Data processing class for the dataloader"""
- def __init__(self, data_type: str, tokenizer: CharTokenizer):
+ def __init__(self, data_type: str, tokenizer: CharTokenizer, hparams: dict):
self.data_type = data_type
self.tokenizer = tokenizer
+ n_features = hparams["n_feats"]
if data_type == "train":
self.audio_transform = torch.nn.Sequential(
- torchaudio.transforms.MelSpectrogram(sample_rate=16000, n_mels=128),
+ torchaudio.transforms.MelSpectrogram(sample_rate=16000, n_mels=n_features),
torchaudio.transforms.FrequencyMasking(freq_mask_param=30),
torchaudio.transforms.TimeMasking(time_mask_param=100),
)
elif data_type == "valid":
- self.audio_transform = torchaudio.transforms.MelSpectrogram()
+ self.audio_transform = torchaudio.transforms.MelSpectrogram(n_mels=n_features)
def __call__(self, data) -> tuple[Tensor, Tensor, list, list]:
spectrograms = []