From fd3106c2cce565d378def73b0d77b0123f68523b Mon Sep 17 00:00:00 2001 From: JoJoBarthold2 Date: Sat, 19 Aug 2023 13:26:01 +0200 Subject: train now returns loss so it can be saved ( amen ) --- swr2_asr/train.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'swr2_asr') diff --git a/swr2_asr/train.py b/swr2_asr/train.py index 81312d9..6eaf4c1 100644 --- a/swr2_asr/train.py +++ b/swr2_asr/train.py @@ -351,7 +351,9 @@ def train( [{batch_idx * len(spectrograms)}/{data_len} \ ({100.0 * batch_idx / len(train_loader)}%)]\t \ Loss: {loss.item()}" + ) + return loss def test(model, device, test_loader, criterion): @@ -460,7 +462,7 @@ def run(learning_rate: float = 5e-4, batch_size: int = 8, epochs: int = 3) -> No iter_meter = IterMeter() for epoch in range(1, epochs + 1): - train( + loss = train( model, device, train_loader, -- cgit v1.2.3 From d5568bb9f51c4b586c7bd8537140cb1e201f5840 Mon Sep 17 00:00:00 2001 From: JoJoBarthold2 Date: Sat, 19 Aug 2023 13:29:20 +0200 Subject: also now saves loss ( hahah funny meme) | || || |_ --- swr2_asr/train.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'swr2_asr') diff --git a/swr2_asr/train.py b/swr2_asr/train.py index 6eaf4c1..9a8620f 100644 --- a/swr2_asr/train.py +++ b/swr2_asr/train.py @@ -351,10 +351,8 @@ def train( [{batch_idx * len(spectrograms)}/{data_len} \ ({100.0 * batch_idx / len(train_loader)}%)]\t \ Loss: {loss.item()}" - ) - return loss - + return loss.item() def test(model, device, test_loader, criterion): """Test""" @@ -476,7 +474,7 @@ def run(learning_rate: float = 5e-4, batch_size: int = 8, epochs: int = 3) -> No torch.save({ 'epoch': epoch, 'model_state_dict': model.state_dict(), - },MODEL_SAVE_PATH) + 'loss': loss},MODEL_SAVE_PATH) test(model=model, device=device, test_loader=test_loader, criterion=criterion) -- cgit v1.2.3 From 631ed7a3f7230cb61023875f3a0945542f6e97a9 Mon Sep 17 00:00:00 2001 From: JoJoBarthold2 Date: Sat, 19 Aug 2023 13:31:28 +0200 Subject: fix --- swr2_asr/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'swr2_asr') diff --git a/swr2_asr/train.py b/swr2_asr/train.py index 9a8620f..2e72dee 100644 --- a/swr2_asr/train.py +++ b/swr2_asr/train.py @@ -352,7 +352,7 @@ def train( ({100.0 * batch_idx / len(train_loader)}%)]\t \ Loss: {loss.item()}" ) - return loss.item() + return loss.item() def test(model, device, test_loader, criterion): """Test""" -- cgit v1.2.3 From aea161ee7f2c96aab529ca22675fb54cdcadbd12 Mon Sep 17 00:00:00 2001 From: JoJoBarthold2 Date: Sat, 19 Aug 2023 14:13:57 +0200 Subject: loading now works --- swr2_asr/train.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'swr2_asr') diff --git a/swr2_asr/train.py b/swr2_asr/train.py index 2e72dee..346be0b 100644 --- a/swr2_asr/train.py +++ b/swr2_asr/train.py @@ -9,8 +9,8 @@ from torch.utils.data import DataLoader import torchaudio from .loss_scores import cer, wer -MODEL_SAVE_PATH = "models/model.pt" -LOSS + + class TextTransform: """Maps characters to integers and vice versa""" @@ -388,7 +388,7 @@ def test(model, device, test_loader, criterion): ) -def run(learning_rate: float = 5e-4, batch_size: int = 8, epochs: int = 3) -> None: +def run(learning_rate: float = 5e-4, batch_size: int = 8, epochs: int = 3,load: bool=False, path: str="models/model.pt") -> None: """Runs the training script.""" hparams = { "n_cnn_layers": 3, @@ -446,10 +446,14 @@ def run(learning_rate: float = 5e-4, batch_size: int = 8, epochs: int = 3) -> No print( "Num Model Parameters", sum([param.nelement() for param in model.parameters()]) ) - optimizer = optim.AdamW(model.parameters(), hparams["learning_rate"]) criterion = nn.CTCLoss(blank=28).to(device) - + if load: + checkpoint = torch.load(path) + model.load_state_dict(checkpoint['model_state_dict']) + optimizer.load_state_dict(checkpoint['optimizer_state_dict']) + epoch = checkpoint['epoch'] + loss = checkpoint['loss'] scheduler = optim.lr_scheduler.OneCycleLR( optimizer, max_lr=hparams["learning_rate"], @@ -474,7 +478,7 @@ def run(learning_rate: float = 5e-4, batch_size: int = 8, epochs: int = 3) -> No torch.save({ 'epoch': epoch, 'model_state_dict': model.state_dict(), - 'loss': loss},MODEL_SAVE_PATH) + 'loss': loss},path) test(model=model, device=device, test_loader=test_loader, criterion=criterion) @@ -482,10 +486,13 @@ def run(learning_rate: float = 5e-4, batch_size: int = 8, epochs: int = 3) -> No @click.option("--learning-rate", default=1e-3, help="Learning rate") @click.option("--batch_size", default=1, help="Batch size") @click.option("--epochs", default=1, help="Number of epochs") -def run_cli(learning_rate: float, batch_size: int, epochs: int) -> None: +@click.option("--load", default = False, help="Do you want to load a model?") +@click.option("--path",default="models/model.pt", + help= "Path where the model will be saved to/loaded from" ) +def run_cli(learning_rate: float, batch_size: int, epochs: int, load:bool,path:str) -> None: """Runs the training script.""" - run(learning_rate=learning_rate, batch_size=batch_size, epochs=epochs) + run(learning_rate=learning_rate, batch_size=batch_size, epochs=epochs,load= load, path = path) if __name__ == "__main__": - run(learning_rate=5e-4, batch_size=16, epochs=1) + run(learning_rate=5e-4, batch_size=16, epochs=1,load=False, path= "models/model.pt") -- cgit v1.2.3