diff --git a/config.yml b/config.yml index 783ce55dd53d9d48f5d4781e62a6feb61e4aa60e..dce0a3ccb616b311177376a3e720d802b43ddc2f 100644 --- a/config.yml +++ b/config.yml @@ -1,7 +1,7 @@ # Dataset Configuration Dataset: num_days: 73 # Number of days in each sequence - should be the same as in Test - - batch_size: 64 + batch_size: 128 num_workers: 7 valid_ratio: 0.2 max_num_samples: None #1000 @@ -61,7 +61,7 @@ BidirectionalLSTM: HiddenSize: 16 NumLayers: 2 LSTMDropout: 0 - FFNDropout: 0.2 + FFNDropout: 0 NumFFN: 3 Initialization: init_he diff --git a/logs/main_unit_test.log b/logs/main_unit_test.log index 38daef58f7cd0d49949d994431bc7b5dea593041..8f016a55f0006b877b3e43fb14e63892d1404617 100644 --- a/logs/main_unit_test.log +++ b/logs/main_unit_test.log @@ -2573,3 +2573,78 @@ INFO:root:Generating the index INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2017-testing.nc.bin_index.idx INFO:root:I loaded 112860 values in the test set INFO:root:= Filling in the submission file +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 541958 samples +INFO:root: - The valid fold has 135202 samples +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 542123 samples +INFO:root: - The valid fold has 135037 samples +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 542201 samples +INFO:root: - The valid fold has 134959 samples +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 541381 samples +INFO:root: - The valid fold has 135779 samples +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 541940 samples +INFO:root: - The valid fold has 135220 samples diff --git a/main.py b/main.py index d0d75d933c6822113fe926713583097fa8770a71..f9ca5f6baed6992bde691d512f6ebb3fa004f05a 100644 --- a/main.py +++ b/main.py @@ -1,9 +1,8 @@ #Internal imports import dataloader import model -import test +import my_test import my_train -import create_submission import utils #External imports @@ -16,7 +15,7 @@ import torch.nn as nn import os import argparse -def optimizer(cfg, network): +def choose_optimizer(cfg, network): result = {"Adam" : torch.optim.Adam(network.parameters())} return result[cfg["Optimizer"]] @@ -91,7 +90,7 @@ def train(args, cfg): f_loss = model.RMSLELoss() - optimizer = optimizer(cfg, network) + optimizer = choose_optimizer(cfg, network) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, @@ -113,10 +112,10 @@ def train(args, cfg): best_val_loss = None for t in range(cfg["Training"]["Epochs"]): - print("Epoch {}".format(t)) + print(f"Epoch {t+1}") my_train.train(args, network, train_loader, f_loss, optimizer, device, log_interval) - val_loss = test.test(network, valid_loader, f_loss, device) + val_loss = my_test.test(network, valid_loader, f_loss, device) if best_val_loss != None: if val_loss < best_val_loss : @@ -126,7 +125,7 @@ def train(args, cfg): scheduler.step(val_loss) - print(" Validation : Loss : {:.4f}".format(val_loss)) + print("Validation : Loss : {:.4f}".format(val_loss)) if not args.no_wandb: wandb.log({"val_loss": val_loss}) @@ -155,7 +154,7 @@ def test(args): network.load_state_dict(torch.load(model_path)) - create_submission.create_submission(args, network, eval(dataset_transform), device, rootDir, logdir) + utils.create_submission(args, network, eval(dataset_transform), device, rootDir, logdir) logging.info(f"The submission csv file has been created in the folder : {logdir}") @@ -209,5 +208,5 @@ if __name__ == "__main__": config_file = open("config.yml") cfg = yaml.load(config_file, Loader=yaml.FullLoader) - eval(f"{args.command}(args)") + eval(f"{args.command}(args, cfg)") diff --git a/model.py b/model.py index 2fd3b44e67a4f06f284337c37b15f3caace918c2..313f58032016c1ef2ca8f6c0d1e471329d1c2ec1 100644 --- a/model.py +++ b/model.py @@ -80,7 +80,14 @@ class BidirectionalLSTM(nn.Module): self.FFN_dropout = cfg["BidirectionalLSTM"]["FFNDropout"] self.num_ffn = cfg["BidirectionalLSTM"]["NumFFN"] - self.lstm = nn.LSTM(input_size, self.hidden_size, self.num_layers, batch_first = True, bidirectional =True, dropout = self.LSTM_dropout) + self.lstm = nn.LSTM( + input_size, + self.hidden_size, + self.num_layers, + batch_first = True, + bidirectional =True, + dropout = self.LSTM_dropout) + self.fc = nn.Sequential() for layer in range(self.num_ffn): @@ -105,7 +112,8 @@ class BidirectionalLSTM(nn.Module): if use_cuda : device = torch.device('cuda') else : - device = toch.device('cpu') + device = torch.device('cpu') + h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) @@ -114,6 +122,7 @@ class BidirectionalLSTM(nn.Module): result = self.fc(out) result = nn.ReLU()(result) + return result @@ -122,7 +131,7 @@ class CNN1D(torch.nn.Module): def __init__(self, cfg, num_inputs): super(CNN1D, self).__init__() - self.model = torch.nn.Sequential( + self.block = torch.nn.Sequential( *conv_block(num_inputs, 32), *conv_block(32, 128) ) @@ -137,9 +146,8 @@ class CNN1D(torch.nn.Module): def forward(self, x): x = torch.transpose(x, 1, 2) - out = self.model(x) + out = self.block(x) - print(f"This is after CNN : {out}") out = self.avg_pool(out) out = out.view([out.shape[0], -1]) diff --git a/test.py b/my_test.py similarity index 100% rename from test.py rename to my_test.py diff --git a/train_indices.subset b/train_indices.subset index 50424bf1058e47db24ac20b9f5063a2f58757f59..7c4d7fd4d5a7a65aaa34f8a80ddc63f15a8ec829 100644 Binary files a/train_indices.subset and b/train_indices.subset differ diff --git a/utils.py b/utils.py index 38b9089d4b31f8be656ad2bf4f28d94e2fe13428..b4293545ee08b7692043b2342dbd9684eaac345a 100644 --- a/utils.py +++ b/utils.py @@ -1,5 +1,15 @@ -import os +# Standard imports import sys +import logging +import os +import datetime + +# External imports +import tqdm +import torch +import torch.nn as nn +import argparse +import yaml def generate_unique_logpath(logdir, raw_run_name): i = 0 @@ -46,4 +56,6 @@ def write_summary(logdir, model, optimizer, val_loss): """.format(val_loss," ".join(sys.argv), model, sum(p.numel() for p in model.parameters() if p.requires_grad), optimizer) summary_file.write(summary_text) - summary_file.close() \ No newline at end of file + summary_file.close() + + diff --git a/valid_indices.subset b/valid_indices.subset index e6a98afb07d8cac740590901c0f7627a17c95064..1bd7deacda9183635879207df14248df38fbffa1 100644 Binary files a/valid_indices.subset and b/valid_indices.subset differ