diff --git a/bindataset.py b/bindataset.py
index 31b055e220bb8970b77697830fd295a5397f15a7..8b4126f24a964bc6137e51b02432d0c16c14a7ac 100644
--- a/bindataset.py
+++ b/bindataset.py
@@ -565,7 +565,16 @@ def test_getitem():
     logging.info(
         f"The idx {idx} corresponds to : \n\tlinear index={lin_index}\n\tfile offset={file_offset}\n\ttab indices={tab_indices}"
     )
-
+    train_loader, valid_loader = get_dataloaders(
+        trainpath,
+        num_days,
+        batch_size,
+        num_workers,
+        use_cuda,
+        valid_ratio,
+        overwrite_index=True,
+        max_num_samples=max_num_samples,
+    )
 
 def test_dataloader():
     logging.info("====> Test dataloader")
diff --git a/config.yml b/config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fc17b56963a1cfcc95a77f86b7c7f7ec93f39973
--- /dev/null
+++ b/config.yml
@@ -0,0 +1,26 @@
+# Dataset Configuration
+Dataset:
+  num_days: 1  # Test with sequence of 1 day
+  batch_size: 128
+  num_workers: 7
+  valid_ratio: 0.2
+  max_num_samples: None #1000
+  _DEFAULT_TRAIN_FILEPATH: "/mounts/Datasets3/2022-ChallengePlankton/sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin"
+  _DEFAULT_TEST_FILEPATH: "/mounts/Datasets3/2022-ChallengePlankton/sub_2CMEMS-MEDSEA-2017-testing.nc.bin"
+  _ENCODING_LINEAR: "I"
+  _ENCODING_INDEX: "I"  # h(short) with 2 bytes should be sufficient
+  _ENCODING_OFFSET_FORMAT: ""
+  _ENCODING_ENDIAN: "<"
+
+#Model selection
+Model:
+  Name: LinearRegression
+
+#Model parameters selection
+LinearRegression:
+  # Bias in {True, False}
+  Bias: True
+
+
+
+
diff --git a/dataloader.py b/dataloader.py
index 6cd159e44c0e6eb68dae0c91740dae3e2ecc2ee2..355a91d18a2db9f349572d54fa464698b52cf7d7 100644
--- a/dataloader.py
+++ b/dataloader.py
@@ -180,3 +180,30 @@ def get_test_dataloader(
     )
     return test_loader
 
+
+if __name__ == "__main__":
+    logging.basicConfig(filename='logs/dataloader_unit_test.log', level=logging.INFO)
+    logging.info("====> Test dataloader")
+    use_cuda = torch.cuda.is_available()
+    trainpath = _DEFAULT_TRAIN_FILEPATH
+    num_days = 1  # Test with sequence of 1 day
+    batch_size = 128
+    num_workers = 7
+    valid_ratio = 0.2
+    # max_num_samples = 1000
+    max_num_samples = None
+
+    train_loader, valid_loader = get_dataloaders(
+        trainpath,
+        num_days,
+        batch_size,
+        num_workers,
+        use_cuda,
+        valid_ratio,
+        overwrite_index=True,
+        max_num_samples=max_num_samples,
+    )
+
+    it = iter(train_loader)
+    X, Y = next(it)
+    logging.info(f"Got a minibatch of size {X.shape} -> {Y.shape}")
\ No newline at end of file
diff --git a/debug.py b/debug.py
index 57961a826add76ca582946c5d7ae7ebde9a7e5fb..0ecb5f4a954ac10770479ea6d83e402229bacf66 100644
--- a/debug.py
+++ b/debug.py
@@ -1,5 +1,5 @@
 from dataset import Dataset
-
+import bindataset
 
 _DEFAULT_TRAIN_FILEPATH = "/mounts/Datasets3/2022-ChallengePlankton/sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin"
 _DEFAULT_TEST_FILEPATH = (
@@ -8,10 +8,32 @@ _DEFAULT_TEST_FILEPATH = (
 
 idx ="sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx"
 
-data = Dataset(_DEFAULT_TRAIN_FILEPATH, False, True, idx, 1, None, None)
+data = Dataset(_DEFAULT_TRAIN_FILEPATH, overwrite_index = False, train = False, subset_file = idx, num_days = 20, transform = None, target_transform = None)
+
+
+"""
+Builds a pointdataset generating the index if necessary or requested
+
+Arguments:
+    filepath: the full path to the nc file to load
+    overwrite_index: if True ignores the index and regenerates it
+    train: if True, accessing an element also returns the phyc
+    subset_file: a filename which holds a list of indices this dataset must use
+    num_days: the number of days that each sample considers
+    transform: a transform to apply to the input tensor
+    target_transform: a transform to apply to the phyc output tensor
+"""
+
+
+print("Len whole dataset :") 
+print(len(data))
+print()
+
+print("Shape data[0] : ")
+print(data[0].shape)
 
-print(len(data[0]))
+print(data.in_variables)
+print(len(data.in_variables))
 
-print(data[0][0].shape)
 
-print(data[0][1])
\ No newline at end of file
+bindataset.test_time_dataset()
\ No newline at end of file
diff --git a/logs/dataloader_unit_test.log b/logs/dataloader_unit_test.log
new file mode 100644
index 0000000000000000000000000000000000000000..a16be9d991b853dafd079359f936ca4db0ccb9ca
--- /dev/null
+++ b/logs/dataloader_unit_test.log
@@ -0,0 +1,53 @@
+INFO:root:====> Test dataloader
+INFO:root:= Dataloaders
+INFO:root:  - Dataset creation
+INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points
+INFO:root:Generating the index
+INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx
+INFO:root:  - Loaded a dataset with 50154984 samples
+INFO:root:  - Splitting the data in training and validation sets
+INFO:root:Generating the subset files from 50154984 samples
+INFO:root:  - Subset dataset
+INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points
+INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx
+INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points
+INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx
+INFO:root:  - The train fold has 40123681 samples
+INFO:root:  - The valid fold has 10031303 samples
+INFO:root:Got a minibatch of size torch.Size([128, 1, 18]) -> torch.Size([128, 1])
+INFO:root:====> Test dataloader
+INFO:root:= Dataloaders
+INFO:root:  - Dataset creation
+INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points
+INFO:root:Generating the index
+INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx
+INFO:root:  - Loaded a dataset with 50154984 samples
+INFO:root:  - Splitting the data in training and validation sets
+INFO:root:Generating the subset files from 50154984 samples
+INFO:root:  - Subset dataset
+INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points
+INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx
+INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points
+INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx
+INFO:root:  - The train fold has 40125966 samples
+INFO:root:  - The valid fold has 10029018 samples
+INFO:root:Got a minibatch of size torch.Size([128, 1, 18]) -> torch.Size([128, 1])
+INFO:root:====> Test dataloader
+INFO:root:= Dataloaders
+INFO:root:  - Dataset creation
+INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points
+INFO:root:Generating the index
+INFO:root:====> Test dataloader
+INFO:root:= Dataloaders
+INFO:root:  - Dataset creation
+INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points
+INFO:root:Generating the index
+INFO:root:====> Test dataloader
+INFO:root:= Dataloaders
+INFO:root:  - Dataset creation
+INFO:root:The loaded dataset contains 25 latitudes, 37 longitudes, 28 depths and 2222 time points
+INFO:root:Generating the index
+INFO:root:Loading the index from sub_2CMEMS-MEDSEA-2010-2016-training.nc.bin_index.idx
+INFO:root:  - Loaded a dataset with 50154984 samples
+INFO:root:  - Splitting the data in training and validation sets
+INFO:root:Generating the subset files from 50154984 samples
diff --git a/main.py b/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6c2bf609a4c49a3036d8a88b8f87fb4b4440031
--- /dev/null
+++ b/main.py
@@ -0,0 +1,35 @@
+import dataloader
+import model
+import test
+import train
+import yaml
+
+if __name__ == "__main__":
+    config_file = open("config.yml")
+    cfg = yaml.load(config_file)
+
+    use_cuda = torch.cuda.is_available()
+    trainpath           = cfg["Dataset"]["_DEFAULT_TRAIN_FILEPATH"]
+    num_days            = cfg["Dataset"]["num_days"]
+    batch_size          = cfg["Dataset"]["batch_size"]
+    num_workers         = cfg["Dataset"]["num_workers"]
+    valid_ratio         = cfg["Dataset"]["valid_ratio"]
+    max_num_samples     = cfg["Dataset"]["max_num_samples"]
+
+    train_loader, valid_loader = dataloader.get_dataloaders(
+        trainpath,
+        num_days,
+        batch_size,
+        num_workers,
+        use_cuda,
+        valid_ratio,
+        overwrite_index=True,
+        max_num_samples=max_num_samples,
+    )
+
+    model = model.build_model(cfg, input_size)
+    logdir = generate_unique_logpath(top_logdir, "linear")
+    print("Logging to {}".format(logdir))
+    # -> Prints out     Logging to   ./logs/linear_1
+    if not os.path.exists(logdir):
+        os.mkdir(logdir)
\ No newline at end of file
diff --git a/model.py b/model.py
index a376ef84c27aba6bda2179b717c1393631d6285d..7a393b4e8c3503c75ee50f3f998ef4ed1be3b4c6 100644
--- a/model.py
+++ b/model.py
@@ -3,4 +3,14 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.autograd import Function
+import models
 
+
+def build_model(cfg, input_size):    
+    return eval(f"models.{cfg['Model']['Name']}(cfg, input_size)")
+
+if __name__== "__main__":
+    import yaml
+    config_file = open("config.yml","r")
+    cfg = yaml.load(config_file)
+    print(cfg['Model']['Name'])
\ No newline at end of file
diff --git a/models/linear.py b/models/linear.py
new file mode 100644
index 0000000000000000000000000000000000000000..0627fad3d6efbc74cf192cc1e6389a0a9fd5b8dc
--- /dev/null
+++ b/models/linear.py
@@ -0,0 +1,11 @@
+import torch.nn as nn
+
+class LinearRegression(nn.Module):
+    def __init__(self, cfg, input_size):
+        super(LinearRegression, self).__init__()
+        self.input_size = input_size
+        self.bias = cfg["LinearRegression"]["Bias"]
+        self.regressor = nn.Linear(input_size, 1, bias)
+    def forward(self, x):
+        y = self.regressor(x)
+        return y
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..986dc9f9b72feee2c5129c1870bba62b0d7873bf
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+torch
+pyyaml
\ No newline at end of file
diff --git a/test.py b/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..80d9e454853e34adead85e949683cb3c92cc056c
--- /dev/null
+++ b/test.py
@@ -0,0 +1,52 @@
+import torch
+
+def test(model, loader, f_loss, device):
+    """
+    Test a model by iterating over the loader
+
+    Arguments :
+
+        model     -- A torch.nn.Module object
+        loader    -- A torch.utils.data.DataLoader
+        f_loss    -- The loss function, i.e. a loss Module
+        device    -- The device to use for computation 
+
+    Returns :
+
+        A tuple with the mean loss and mean accuracy
+
+    """
+    # We disable gradient computation which speeds up the computation
+    # and reduces the memory usage
+    with torch.no_grad():
+        model.eval()
+        N = 0
+        tot_loss, correct = 0.0, 0.0
+        for _, (inputs, targets) in enumerate(loader):
+
+            # We got a minibatch from the loader within inputs and targets
+            # With a mini batch size of 128, we have the following shapes
+            #    inputs is of shape (128, 1, 28, 28)
+            #    targets is of shape (128)
+
+            # We need to copy the data on the GPU if we use one
+            inputs, targets = inputs.to(device), targets.to(device)
+
+            # Compute the forward pass, i.e. the scores for each input image
+            outputs = model(inputs)
+
+            # We accumulate the exact number of processed samples
+            N += inputs.shape[0]
+
+            # We accumulate the loss considering
+            # The multipliation by inputs.shape[0] is due to the fact
+            # that our loss criterion is averaging over its samples
+            tot_loss += inputs.shape[0] * f_loss(outputs, targets).item()
+
+            # For the accuracy, we compute the labels for each input image
+            # Be carefull, the model is outputing scores and not the probabilities
+            # But given the softmax is not altering the rank of its input scores
+            # we can compute the label by argmaxing directly the scores
+            predicted_targets = outputs.argmax(dim=1)
+            correct += (predicted_targets == targets).sum().item()
+        return tot_loss/N, correct/N
\ No newline at end of file
diff --git a/train.py b/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce6f54c9315a80af78a1d46ba765235fb81fc4f6
--- /dev/null
+++ b/train.py
@@ -0,0 +1,31 @@
+def train(model, loader, f_loss, optimizer, device):
+    """
+    Train a model for one epoch, iterating over the loader
+    using the f_loss to compute the loss and the optimizer
+    to update the parameters of the model.
+
+    Arguments :
+
+        model     -- A torch.nn.Module object
+        loader    -- A torch.utils.data.DataLoader
+        f_loss    -- The loss function, i.e. a loss Module
+        optimizer -- A torch.optim.Optimzer object
+        device    -- a torch.device class specifying the device
+                     used for computation
+
+    Returns :
+    """
+
+    model.train()
+
+    for _, (inputs, targets) in enumerate(loader):
+        inputs, targets = inputs.to(device), targets.to(device)
+
+        # Compute the forward pass through the network up to the loss
+        outputs = model(inputs)
+        loss = f_loss(outputs, targets)
+
+        # Backward and optimize
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
\ No newline at end of file
diff --git a/train_indices.subset b/train_indices.subset
index 20e13c16236888ceacd42847424c1c598cbf0fc6..52f50d5e0619ed0d361ba53becfb35495ae718be 100644
Binary files a/train_indices.subset and b/train_indices.subset differ
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6b3e52f630f8b2af68e562cffc74fa31058cbfc
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,11 @@
+import os
+
+def generate_unique_logpath(logdir, raw_run_name):
+    i = 0
+    while(True):
+        run_name = raw_run_name + "_" + str(i)
+        log_path = os.path.join(logdir, run_name)
+        if not os.path.isdir(log_path):
+            return log_path
+        i = i + 1
+
diff --git a/valid_indices.subset b/valid_indices.subset
index 457f1d117f15460c2c1cc0aa881eae34b069767f..8733e1ea294c6b7e278286602c31a9ec5689b66a 100644
Binary files a/valid_indices.subset and b/valid_indices.subset differ