diff --git a/config.yml b/config.yml index 844681d65fd4aab0e45307439852b96867047c5b..1120a1b09bc4ea0dac7c292003e3b4ca204b81df 100644 --- a/config.yml +++ b/config.yml @@ -43,7 +43,7 @@ Optimizer: Adam # in {Adam} #Training parameters Training: - Epochs: 20 + Epochs: 60 #Model selection Model: diff --git a/logs/main_unit_test.log b/logs/main_unit_test.log index 01a62c2969b74dfe484a50687c7890315a366fb8..950ba21aa3f1c26e95b34f1b37fcf47d46e3eb8e 100644 --- a/logs/main_unit_test.log +++ b/logs/main_unit_test.log @@ -2542,3 +2542,33 @@ INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx INFO:root: - The train fold has 541719 samples INFO:root: - The valid fold has 135441 samples +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 541712 samples +INFO:root: - The valid fold has 135448 samples +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 541661 samples +INFO:root: - The valid fold has 135499 samples diff --git a/main.py b/main.py index 8e568cd3b2276852ab45e8db4c4038c0ea44b1fc..bab93c2e2ba3db40fb129ce9e77836f68fdf6be6 100644 --- a/main.py +++ b/main.py @@ -114,6 +114,14 @@ if __name__ == "__main__": optimizer = optimizer(cfg, network) + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( + optimizer, + 'min', + patience = 5, + threshold = 0.2, + factor = 0.5 + ) + logdir, raw_run_name = utils.create_unique_logpath(rootDir, cfg["Model"]["Name"]) network_checkpoint = model.ModelCheckpoint(logdir + "/best_model.pt", network) @@ -121,14 +129,16 @@ if __name__ == "__main__": wandb.run.name = raw_run_name wandb.watch(network, log_freq = log_freq) - torch.autograd.set_detect_anomaly(True) - + #torch.autograd.set_detect_anomaly(True) + for t in range(cfg["Training"]["Epochs"]): print("Epoch {}".format(t)) train(args, network, train_loader, f_loss, optimizer, device, log_interval) val_loss = test.test(network, valid_loader, f_loss, device) + scheduler.step(val_loss) + network_checkpoint.update(val_loss) print(" Validation : Loss : {:.4f}".format(val_loss)) diff --git a/train.py b/train.py index 8f98f5d71fb2910067921efa27c8d26ba2c2cccc..778b6c7e356fa2152a9edc82e92b40946d401639 100644 --- a/train.py +++ b/train.py @@ -39,7 +39,6 @@ def train(args, model, loader, f_loss, optimizer, device, log_interval = 100): optimizer.zero_grad() loss.backward() - #torch.nn.utils.clip_grad_norm(model.parameters(), 50) Y = list(model.parameters())[0].grad.cpu().tolist() diff --git a/train_indices.subset b/train_indices.subset index 6b03f35655aa0ac4dd73d9dbe8d7962d868b9e0a..50424bf1058e47db24ac20b9f5063a2f58757f59 100644 Binary files a/train_indices.subset and b/train_indices.subset differ diff --git a/valid_indices.subset b/valid_indices.subset index b6bc2b9147d2278d5d3897fa11f4e85722d72e6c..e6a98afb07d8cac740590901c0f7627a17c95064 100644 Binary files a/valid_indices.subset and b/valid_indices.subset differ