#!/usr/bin/python import os import subprocess import argparse def makejob(commit_id, model, nruns, user, time_wall): return f"""#!/bin/bash #SBATCH --job-name={model} #SBATCH --nodes=1 #SBATCH --partition=gpu_prod_night #SBATCH --time={time_wall} #SBATCH --output=logslurms/slurm-%A_%a.out #SBATCH --error=logslurms/slurm-%A_%a.err #SBATCH --array=0-{nruns} current_dir=`pwd` echo "Session " {model}_${{SLURM_ARRAY_JOB_ID}}_${{SLURM_ARRAY_TASK_ID}} echo "Copying the source directory and data" date mkdir $TMPDIR/projet_dl rsync -r . $TMPDIR/projet_dl echo "Checking out the correct version of the code commit_id {commit_id}" cd $TMPDIR/pprojet_dl git checkout {commit_id} echo "Setting up the virtual environment" python3 -m pip install virtualenv --user virtualenv -p python3 venv source venv/bin/activate python -m pip install -r requirements.txt echo "Running main.py" python3 main.py --logDir /usr/users/sdi1/sdi1_3/Projet_DL/Kaggle_Phytoplankton/logs/ --no_wandb if [[ $? != 0 ]]; then exit -1 fi # Once the job is finished, you can copy back back # files from $TMPDIR/emnist to $current_dir """ def submit_job(job): with open('job.sbatch', 'w') as fp: fp.write(job) os.system("sbatch job.sbatch") # Ensure all the modified files have been staged and commited result = int(subprocess.run("expr $(git diff --name-only | wc -l) + $(git diff --name-only --cached | wc -l)", shell=True, stdout=subprocess.PIPE).stdout.decode()) if result > 0: print(f"We found {result} modifications either not staged or not commited") raise RuntimeError("You must stage and commit every modification before submission ") commit_id = subprocess.check_output("git log --pretty=format:'%H' -n 1", shell=True).decode() parser = argparse.ArgumentParser() parser.add_argument("--time_wall", default="no-limit", help="Time wall. Choose in [no-limit, hour, half, quarter]") parser.add_argument("--model_name", default ="Bi-LSTM", help="Name of the model to train") # Ensure the log directory exists os.system("mkdir -p logslurms") time_wall = {"no_limit": "48:00:00","hour" : "1:00:00", "half" : "0:00:00", "quarter" : "0:00:15"} # Launch the batch jobs submit_job(makejob(commit_id, "cnn", 1, args.user, time_wall[args.time_wall]))