# coding: utf-8 """ This a dummy test with a non sense model in order to illustrate how on could apply a model to produce predictions as expected by the submission The main algorithmic difficulties illustrated by this script are : - continuously iterating over the test_dataset without shuffling will continuously iteration over the volmue (latitude, longitude, depth, time) - when iterating over minibatches, to subsample every time series by step of 10 days, we show how to identify where to sample the minibatches of predictions """ # Standard imports import sys import logging import datetime # External imports import tqdm import torch import torch.nn as nn # Local imports import dataloader def dummy_model(X): # X is a (B, T, N) tensor # As a dummy model, say, we average all the environmental measures # Divided by a magic number return X[:, :, 4:].mean(dim=2) / 26 # This is (B, T) def create_submission(model, transform, device): step_days = 10 batch_size = 1024 # We make chunks of num_days consecutive samples; As our dummy predictor # is not using the temporal context, this is here arbitrarily chosen # However, note that it must be a divisor of the total number of days # in the 2017 year , either 1, 5, 73 or 365 num_days = 365 num_workers = 7 use_cuda = torch.cuda.is_available() # Build the dataloaders logging.info("Building the dataloader") test_loader = dataloader.get_test_dataloader( dataloader._DEFAULT_TEST_FILEPATH, num_days, batch_size, num_workers, use_cuda, overwrite_index=True, transform=transform, target_transform=None, ) num_days_test = test_loader.dataset.ntimes logging.info("= Filling in the submission file") with open("submission.csv", "w") as fh_submission: fh_submission.write("Id,Predicted\n") submission_offset = 0 # Iterate on the test dataloader t_offset = 0 # Every minibatch will contain batch_size * num_days # As we do not shuffle the data these correspond to consecutive # days of the same location then followed by consecutive days of the # next location and so on chunk_size = batch_size * num_days with torch.no_grad(): for X in tqdm.tqdm(test_loader): X = X.to(device) ############################################# # This is where you inject your knowledge # About your model # The rest of the code is generic as soon as you have a # model working on time series # X is (B, T, N) # predictions are (B, T) predictions = model(X) ############################################# # we reshape it in (B * T) # and keep only the time instants we need predictions = predictions.view(-1) # we need to slice the times by steps of days # in chunks of num_test_days days (2017 had 365 days) yearcut_indices = list(range(0, chunk_size + t_offset, num_days_test)) # The yearcut_indices are the indices in the linearized minibatch # corresponding to the 01/01/2017 for some (latitude, longitude, depth) # For these yearcut_indices, we can locate where to sample # The vector of predictions subdays_indices = [ y + k for y in yearcut_indices for k in range(0, num_days_test, step_days) ] subdays_indices = list(map(lambda i: i - t_offset, subdays_indices)) # Remove the negative indices if any # These negatives indices happen because of the offset # These correspond to the locations of the 01/01/2017 in the previous # minibatch subdays_indices = [ k for k in subdays_indices if 0 <= k < min(chunk_size, predictions.shape[0]) ] t_offset = chunk_size - (yearcut_indices[-1] - t_offset) predictions_list = predictions[subdays_indices].tolist() # Check # X = X.view(-1, 18) # subX = X[yearcut_indices, :] # # subX = X # timestamps = subX[:, 3].tolist() # print( # "\n".join( # [f"{datetime.datetime.fromtimestamp(x)}" for x in timestamps] # ) # ) # print("\n\n") # sys.exit(-1) # Dump the predictions to the submission file submission_part = "\n".join( [ f"{i+submission_offset},{pred}" for i, pred in enumerate(predictions_list) ] ) fh_submission.write(submission_part + "\n") submission_offset += len(predictions_list) fh_submission.close() if __name__ == "__main__": logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") use_cuda = torch.cuda.is_available() if use_cuda : device = torch.device('cuda') else : device = toch.device('cpu') model_path = "logs/LinearRegression_1/best_model.pt" model = nn.Sequential( nn.Linear(14,35,True), nn.ReLU(), nn.Linear(35, 35, True), nn.ReLU(), nn.Linear(35,35,True), nn.ReLU(), nn.Linear(35,35,True), nn.ReLU(), nn.Linear(35,35,True), nn.ReLU(), nn.Linear(35,35,True), nn.ReLU(), nn.Linear(35,35, True), nn.ReLU(), nn.Linear(35,1, True), nn.ReLU() ) model = model.to(device) model.load_state_dict(torch.load(model_path)) create_submission(model, dataloader.transform_remove_space_time(), device) #create_submission(model, None, device)