diff --git a/config.yml b/config.yml index 7ed0a0c410ea6afa5e199e2ca4b67c1af02c0f07..d1c5e4aed8c2336f589841c6fd360bf94d13ef35 100644 --- a/config.yml +++ b/config.yml @@ -40,7 +40,8 @@ Training: #Model selection Model: - Name: RNN + Name: BidirectionalLSTM + #choose in {LinearRegression, BidirectionalLSTM, RNN} #Model parameters selection LinearRegression: @@ -50,12 +51,14 @@ LinearRegression: Initialization: init_he BidirectionalLSTM: - HiddenSize: 70 - NumLayers: 4 + HiddenSize: 16 + NumLayers: 1 + Dropout: 0.2 + NumFFN: 8 Initialization: None RNN: - HiddenSize: 35 + HiddenSize: 32 NumLayers: 2 NumFFN: 2 Dropout: 0.2 diff --git a/create_submission.py b/create_submission.py index 0ecb9b3cd222fb01e30e0db8ae2040dd776ebc19..fba65a17abf6b0441d34249acc73c9399898e985 100644 --- a/create_submission.py +++ b/create_submission.py @@ -20,6 +20,7 @@ import datetime import tqdm import torch import torch.nn as nn +import argparse # Local imports import dataloader @@ -139,6 +140,7 @@ def create_submission(model, transform, device, rootDir, logdir): if __name__ == "__main__": + logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") use_cuda = torch.cuda.is_available() if use_cuda : @@ -171,5 +173,5 @@ if __name__ == "__main__": model.load_state_dict(torch.load(model_path)) - create_submission(model, dataloader.transform_remove_space_time(), device) + create_submission(model, dataloader.transform_remove_space_time(), device, ) #create_submission(model, None, device) \ No newline at end of file diff --git a/logs/BidirectionalLSTM_0/best_model.pt b/logs/BidirectionalLSTM_0/best_model.pt index 1ca9d01cb59f8667ae921dbcc1814116f160c3db..380773185ca73ead68a48aa943915f16ef2d9944 100644 Binary files a/logs/BidirectionalLSTM_0/best_model.pt and b/logs/BidirectionalLSTM_0/best_model.pt differ diff --git a/logs/BidirectionalLSTM_2/best_model.pt b/logs/BidirectionalLSTM_2/best_model.pt deleted file mode 100644 index 7f58d7af9d4ec7b995f4ae91272610037ab3de4d..0000000000000000000000000000000000000000 Binary files a/logs/BidirectionalLSTM_2/best_model.pt and /dev/null differ diff --git a/logs/BidirectionalLSTM_3/best_model.pt b/logs/BidirectionalLSTM_3/best_model.pt deleted file mode 100644 index 97a6b7a3b880eae5bfcd3d1a9ecd0895c9a3e345..0000000000000000000000000000000000000000 Binary files a/logs/BidirectionalLSTM_3/best_model.pt and /dev/null differ diff --git a/logs/main_unit_test.log b/logs/main_unit_test.log index 18b8c71acf0f308d3979643fbb155ea1635e90c8..9af58e2815298b38ee7e44d1066ee02002d808c6 100644 --- a/logs/main_unit_test.log +++ b/logs/main_unit_test.log @@ -1730,3 +1730,131 @@ INFO:root:Generating the index INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2017-testing.nc.bin_index.idx INFO:root:I loaded 112860 values in the test set INFO:root:= Filling in the submission file +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 541593 samples +INFO:root: - The valid fold has 135567 samples +INFO:root:Building the dataloader +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 365 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2017-testing.nc.bin_index.idx +INFO:root:I loaded 112860 values in the test set +INFO:root:= Filling in the submission file +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 541894 samples +INFO:root: - The valid fold has 135266 samples +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 542036 samples +INFO:root: - The valid fold has 135124 samples +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 541952 samples +INFO:root: - The valid fold has 135208 samples +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 541584 samples +INFO:root: - The valid fold has 135576 samples +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 541804 samples +INFO:root: - The valid fold has 135356 samples +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 541569 samples +INFO:root: - The valid fold has 135591 samples +INFO:root:= Dataloaders +INFO:root: - Dataset creation +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Generating the index +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - Loaded a dataset with 677160 samples +INFO:root: - Splitting the data in training and validation sets +INFO:root:Generating the subset files from 677160 samples +INFO:root: - Subset dataset +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points +INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx +INFO:root: - The train fold has 541821 samples +INFO:root: - The valid fold has 135339 samples diff --git a/model.py b/model.py index 6f49f5122804756d8ec6244570e0d22c78cdb935..2ce2607a78d434da5a0da157822d535eff316121 100644 --- a/model.py +++ b/model.py @@ -78,8 +78,29 @@ class BidirectionalLSTM(nn.Module): super(BidirectionalLSTM, self).__init__() self.hidden_size = cfg["BidirectionalLSTM"]["HiddenSize"] self.num_layers = cfg["BidirectionalLSTM"]["NumLayers"] + self.dropout = cfg["BidirectionalLSTM"]["Dropout"] + self.num_ffn = cfg["BidirectionalLSTM"]["NumFFN"] + self.lstm = nn.LSTM(input_size, self.hidden_size, self.num_layers, batch_first = True, bidirectional =True, dropout = 0.2) - self.linear = nn.Linear(2*self.hidden_size, 1) + self.fc = nn.Sequential() + + for layer in range(self.num_ffn): + self.fc.add_module( + f"linear_{layer}", nn.Linear(2*self.hidden_size, 2*self.hidden_size) + ) + self.fc.add_module( + f"relu_{layer}", + nn.ReLU() + ) + self.fc.add_module( + f"dropout_{layer}", + nn.Dropout(p=self.dropout) + ) + self.fc.add_module( + "last_linear", + nn.Linear(2*self.hidden_size, 1) + ) + def forward(self, x): use_cuda = torch.cuda.is_available() if use_cuda : @@ -91,7 +112,7 @@ class BidirectionalLSTM(nn.Module): out, (hidden_state, cell_state) = self.lstm(x, (h0, c0)) - result = self.linear(out) + result = self.fc(out) return result # Initialization diff --git a/train_indices.subset b/train_indices.subset index 908f94d84b96fa83e570be8e0d757dc2a204c211..884531a7aab539a7b5aa7173d72c94be814f4731 100644 Binary files a/train_indices.subset and b/train_indices.subset differ diff --git a/valid_indices.subset b/valid_indices.subset index db33627debbe3962e9335062ed98d346d4ab0f11..7991aa210637e128c860499e5ef347315d26b6fd 100644 Binary files a/valid_indices.subset and b/valid_indices.subset differ diff --git a/wandb/debug-cli.sdi1_3.log b/wandb/debug-cli.sdi1_3.log index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..4ffc2bff8863a799e4ccdbf7206971c17521b81e 100644 --- a/wandb/debug-cli.sdi1_3.log +++ b/wandb/debug-cli.sdi1_3.log @@ -0,0 +1,4 @@ +2023-02-04 00:59:41 ERROR dropped chunk 404 Client Error: Not Found for url: https://api.wandb.ai/files/wherephytoplankton/Kaggle%20phytoplancton/th0741zs/file_stream +NoneType: None +2023-02-04 00:59:41 ERROR dropped chunk 404 Client Error: Not Found for url: https://api.wandb.ai/files/wherephytoplankton/Kaggle%20phytoplancton/th0741zs/file_stream +NoneType: None diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log index 7405266d86a1dca74b8f6b1a13e5172fbea0a14e..41e6ad0e5549b454799d4cf9bf91c8f41254aae5 120000 --- a/wandb/debug-internal.log +++ b/wandb/debug-internal.log @@ -1 +1 @@ -run-20230203_233455-ujner2cx/logs/debug-internal.log \ No newline at end of file +run-20230204_010308-1aksu4p8/logs/debug-internal.log \ No newline at end of file diff --git a/wandb/debug.log b/wandb/debug.log index d2a2374152daf8a98632afd89acf22f36b0e9e4c..5ffe849f735607feda595d9f905dcd19ac498ce8 120000 --- a/wandb/debug.log +++ b/wandb/debug.log @@ -1 +1 @@ -run-20230203_233455-ujner2cx/logs/debug.log \ No newline at end of file +run-20230204_010308-1aksu4p8/logs/debug.log \ No newline at end of file diff --git a/wandb/latest-run b/wandb/latest-run index c0eef7e6f30408ede8d6007a30e95ab9b7701a24..1358adb74bf70d265410a1603dadf86a3c326540 120000 --- a/wandb/latest-run +++ b/wandb/latest-run @@ -1 +1 @@ -run-20230203_233455-ujner2cx \ No newline at end of file +run-20230204_010308-1aksu4p8 \ No newline at end of file