From a7f7fa3a3847bdc4970b6ceecee976253807b2b8 Mon Sep 17 00:00:00 2001
From: Yandi <yandirzm@gmail.com>
Date: Sun, 22 Jan 2023 14:54:26 +0100
Subject: [PATCH] [Submitting]

---
 config.yml           |  3 ++-
 create_submission.py | 50 +++++++++++++++++++++++++++++++------
 dataloader.py        |  4 ++-
 main.py              | 59 ++++++++++++++++++++++++++++++--------------
 model.py             | 11 +++++++++
 submission.csv       |  1 +
 utils.py             |  6 +++++
 7 files changed, 106 insertions(+), 28 deletions(-)
 create mode 100644 submission.csv

diff --git a/config.yml b/config.yml
index b448853..8537f70 100644
--- a/config.yml
+++ b/config.yml
@@ -28,6 +28,7 @@ LinearRegression:
   # Bias in {True, False}
   Bias: True
 
-
+#Name of directory containing logs
+LogDir: ./logs/
 
 
diff --git a/create_submission.py b/create_submission.py
index 45aebaa..126caf8 100644
--- a/create_submission.py
+++ b/create_submission.py
@@ -19,11 +19,19 @@ import datetime
 # External imports
 import tqdm
 import torch
+import torch.nn as nn
 
 # Local imports
 import bindataset as dataset
+import dataloader
 
-def create_submission(model, transform):
+def dummy_model(X):
+    # X is a (B, T, N) tensor
+    # As a dummy model, say, we average all the environmental measures
+    # Divided by a magic number
+    return X[:, :, 4:].mean(dim=2) / 26  # This is (B, T)
+
+def create_submission(model, transform, device):
     step_days = 10
     batch_size = 1024
     # We make chunks of num_days consecutive samples; As our dummy predictor
@@ -33,9 +41,6 @@ def create_submission(model, transform):
     num_days = 365
     num_workers = 7
 
-    use_cuda = torch.cuda.is_available()
-    device = torch.device("cuda") if use_cuda else torch.device("cpu")
-
     # Build the dataloaders
     logging.info("Building the dataloader")
 
@@ -63,11 +68,10 @@ def create_submission(model, transform):
         # days of the same location then followed by consecutive days of the
         # next location and so on
         chunk_size = batch_size * num_days
-
         with torch.no_grad():
             for X in tqdm.tqdm(test_loader):
-                X.to(device)
-
+                X = X.to(device)
+                print(X.shape)
                 #############################################
                 # This is where you inject your knowledge
                 # About your model
@@ -137,4 +141,34 @@ def create_submission(model, transform):
 
 if __name__ == "__main__":
     logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")
-    test()
\ No newline at end of file
+    use_cuda = torch.cuda.is_available()
+    if use_cuda :
+        device = torch.device('cuda')
+    else :
+        device = toch.device('cpu')
+
+    model_path = "logs/LinearRegression_5/best_model.pt"
+
+    model = nn.Sequential(
+        nn.Linear(14,8,False),
+        nn.ReLU(),
+        nn.Linear(8, 35, True),
+        nn.ReLU(),
+        nn.Linear(35,35,True),
+        nn.ReLU(),
+        nn.Linear(35,35,True),
+        nn.ReLU(),
+        nn.Linear(35,35,True),
+        nn.ReLU(),
+        nn.Linear(35,35,True),
+        nn.ReLU(),
+        nn.Linear(35,1, True),
+        nn.ReLU()
+    )
+
+    model = model.to(device)
+
+    model.load_state_dict(torch.load(model_path))
+
+    #create_submission(model, dataloader.transform_remove_space_time(), device)
+    create_submission(model, None, device)
\ No newline at end of file
diff --git a/dataloader.py b/dataloader.py
index 6f9f4f6..0f131ea 100644
--- a/dataloader.py
+++ b/dataloader.py
@@ -200,7 +200,7 @@ if __name__ == "__main__":
         num_days = num_days,
         batch_size = batch_size,
         num_workers = num_workers,
-        pin_memory = False,
+        pin_memory = True,
         valid_ratio = valid_ratio,
         overwrite_index=True,
         max_num_samples=max_num_samples,
@@ -241,6 +241,8 @@ if __name__ == "__main__":
         print(check_max(tensor, index))
         print("="*30)
 
+    print(X.shape)
+
     check_info(X,0) #latitude
     check_info(X,1) #longitude
     check_info(X,2) #depth
diff --git a/main.py b/main.py
index a29d8dc..a759e7e 100644
--- a/main.py
+++ b/main.py
@@ -1,18 +1,23 @@
+#Internal imports
 import dataloader
 import model
 import test
 from train import train
-import yaml
 import losses
 import optimizers
+import create_submission
+import utils
+
+#External imports
+import yaml
 import torch
 import logging
 import torch.optim
 import torch.nn as nn
-import create_submission
+import os
 
-def optimizer(cfg, model):
-    result = {"Adam" : torch.optim.Adam(model.parameters())}
+def optimizer(cfg, network):
+    result = {"Adam" : torch.optim.Adam(network.parameters())}
     return result[cfg["Optimizer"]]
 
 if __name__ == "__main__":
@@ -47,39 +52,57 @@ if __name__ == "__main__":
     else :
         device = toch.device('cpu')
 
-    #model = model.build_model(cfg, 18)
+    #network = network.build_network(cfg, 18)
 
-    model = nn.Sequential(
+    network = nn.Sequential(
         nn.Linear(14,8,False),
         nn.ReLU(),
-        nn.Linear(8, 8, True),
+        nn.Linear(8, 35, True),
+        nn.ReLU(),
+        nn.Linear(35,35,True),
+        nn.ReLU(),
+        nn.Linear(35,35,True),
+        nn.ReLU(),
+        nn.Linear(35,35,True),
         nn.ReLU(),
-        nn.Linear(8,35,True),
+        nn.Linear(35,35,True),
         nn.ReLU(),
-        nn.Linear(35,1, True)
+        nn.Linear(35,1, True),
+        nn.ReLU()
     )
-    model = model.to(device)
 
-    for param in list(model.parameters()):
+    def init_xavier(module):
+        if type(module)==nn.Linear:
+            nn.init.xavier_uniform_(module.weight)
+    network = network.to(device)
+
+    """
+    for param in list(network.parameters()):
         param = 1
+    """
 
     f_loss = losses.RMSLELoss()
 
-    optimizer = optimizer(cfg, model)
-    #optimizer = torch.optim.Adam((model.parameters()), lr = 10000)
-  
+    optimizer = optimizer(cfg, network)
+
+    logdir = utils.create_unique_logpath(cfg["LogDir"], cfg["Model"]["Name"])
+    network_checkpoint = model.ModelCheckpoint(logdir + "/best_model.pt", network)
 
     for t in range(cfg["Training"]["Epochs"]):
         torch.autograd.set_detect_anomaly(True)
         print("Epoch {}".format(t))
-        train(model, train_loader, f_loss, optimizer, device)
+        train(network, train_loader, f_loss, optimizer, device)
 
 
-        #print(list(model.parameters())[0].grad)
-        val_loss = test.test(model, valid_loader, f_loss, device)
+        #print(list(network.parameters())[0].grad)
+        val_loss = test.test(network, valid_loader, f_loss, device)
+
+        network_checkpoint.update(val_loss)
+
         print(" Validation : Loss : {:.4f}".format(val_loss))
 
-    create_submission.create_submission(model, None)
+
+    create_submission.create_submission(network, None)
     """
     logdir = generate_unique_logpath(top_logdir, "linear")
     print("Logging to {}".format(logdir))
diff --git a/model.py b/model.py
index 4e6830c..a40839c 100644
--- a/model.py
+++ b/model.py
@@ -20,6 +20,17 @@ class LinearRegression(nn.Module):
 def build_model(cfg, input_size):    
     return eval(f"{cfg['Model']['Name']}(cfg, input_size)")
 
+class ModelCheckpoint:
+    def __init__(self, filepath, model):
+        self.min_loss = None
+        self.filepath = filepath
+        self.model = model
+
+    def update(self, loss):
+        if (self.min_loss is None) or (loss < self.min_loss):
+            print("Saving a better model")
+            torch.save(self.model.state_dict(), self.filepath)
+            self.min_loss = loss
 
 if __name__== "__main__":
     import yaml
diff --git a/submission.csv b/submission.csv
new file mode 100644
index 0000000..b1f81b8
--- /dev/null
+++ b/submission.csv
@@ -0,0 +1 @@
+Id,Predicted
diff --git a/utils.py b/utils.py
index b6b3e52..0c585e7 100644
--- a/utils.py
+++ b/utils.py
@@ -9,3 +9,9 @@ def generate_unique_logpath(logdir, raw_run_name):
             return log_path
         i = i + 1
 
+def create_unique_logpath(top_logdir, raw_run_name):
+    if not os.path.exists(top_logdir):
+        os.mkdir(top_logdir)
+    logdir = generate_unique_logpath(top_logdir, raw_run_name)
+    os.mkdir(logdir)
+    return logdir
\ No newline at end of file
-- 
GitLab