{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "hAyWzMHRd_bW"
   },
   "source": [
    "# Environment setup\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Install Python libraries"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We first pip-install the Python libraries listed in ```requirements.txt```."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "plaintext"
    }
   },
   "outputs": [],
   "source": [
    "!pip install -r requirements.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Download data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We will download the [SONYC-UST dataset](https://zenodo.org/record/2590742#.XIkTPBNKjuM). The dataset was released along with the following paper (it's probably a good idea to have a look at it):\n",
    "\n",
    "> Cartwright, M., Mendez, A.E.M., Cramer, J., Lostanlen, V., Dove, G., Wu, H., Salamon, J., Nov, O., Bello, J.P. SONYC Urban Sound Tagging (SONYC-UST): A Multilabel Dataset from an Urban Acoustic Sensor Network. In Proceedings of the Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE) , 2019. [PDF](https://dcase.community/documents/workshop2019/proceedings/DCASE2019Workshop_Cartwright_4.pdf)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "root_path = \"/content/drive/My Drive/Enseignement/2023-2024/NumVi/IA-DL/UST_project\"\n",
    "ust_data_path = os.path.join(root_path, \"data/ust-data/sonyc-ust\")\n",
    "os.makedirs(ust_data_path, exist_ok=True) # create a folder to store the data\n",
    "os.makedirs(os.path.join(ust_data_path, 'audio-dev'), exist_ok=True) # create a folder to store the development data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2024-02-21 13:37:19--  https://zenodo.org/record/3338310/files/annotations.csv\n",
      "Resolving zenodo.org (zenodo.org)... 188.184.103.159, 188.185.79.172, 188.184.98.238, ...\n",
      "Connecting to zenodo.org (zenodo.org)|188.184.103.159|:443... connected.\n",
      "HTTP request sent, awaiting response... 301 MOVED PERMANENTLY\n",
      "Location: /records/3338310/files/annotations.csv [following]\n",
      "--2024-02-21 13:37:20--  https://zenodo.org/records/3338310/files/annotations.csv\n",
      "Reusing existing connection to zenodo.org:443.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 3163972 (3.0M) [text/plain]\n",
      "Saving to: ‘annotations.csv’\n",
      "\n",
      "annotations.csv     100%[===================>]   3.02M  4.17MB/s    in 0.7s    \n",
      "\n",
      "2024-02-21 13:37:21 (4.17 MB/s) - ‘annotations.csv’ saved [3163972/3163972]\n",
      "\n",
      "--2024-02-21 13:37:21--  https://zenodo.org/record/3338310/files/audio-dev.tar.gz\n",
      "Resolving zenodo.org (zenodo.org)... 188.184.103.159, 188.185.79.172, 188.184.98.238, ...\n",
      "Connecting to zenodo.org (zenodo.org)|188.184.103.159|:443... connected.\n",
      "HTTP request sent, awaiting response... 301 MOVED PERMANENTLY\n",
      "Location: /records/3338310/files/audio-dev.tar.gz [following]\n",
      "--2024-02-21 13:37:21--  https://zenodo.org/records/3338310/files/audio-dev.tar.gz\n",
      "Reusing existing connection to zenodo.org:443.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 1892542516 (1.8G) [application/octet-stream]\n",
      "Saving to: ‘audio-dev.tar.gz’\n",
      "\n",
      "audio-dev.tar.gz    100%[===================>]   1.76G  4.84MB/s    in 6m 37s  \n",
      "\n",
      "2024-02-21 13:43:59 (4.54 MB/s) - ‘audio-dev.tar.gz’ saved [1892542516/1892542516]\n",
      "\n",
      "--2024-02-21 13:43:59--  https://zenodo.org/record/3338310/files/audio-eval.tar.gz\n",
      "Resolving zenodo.org (zenodo.org)... 188.184.103.159, 188.184.98.238, 188.185.79.172, ...\n",
      "Connecting to zenodo.org (zenodo.org)|188.184.103.159|:443... connected.\n",
      "HTTP request sent, awaiting response... 301 MOVED PERMANENTLY\n",
      "Location: /records/3338310/files/audio-eval.tar.gz [following]\n",
      "--2024-02-21 13:44:00--  https://zenodo.org/records/3338310/files/audio-eval.tar.gz\n",
      "Reusing existing connection to zenodo.org:443.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 180759660 (172M) [application/octet-stream]\n",
      "Saving to: ‘audio-eval.tar.gz’\n",
      "\n",
      "audio-eval.tar.gz   100%[===================>] 172.39M  20.4MB/s    in 8.7s    \n",
      "\n",
      "2024-02-21 13:44:09 (19.7 MB/s) - ‘audio-eval.tar.gz’ saved [180759660/180759660]\n",
      "\n",
      "--2024-02-21 13:44:09--  https://zenodo.org/record/3338310/files/dcase-ust-taxonomy.yaml\n",
      "Resolving zenodo.org (zenodo.org)... 188.184.103.159, 188.184.98.238, 188.185.79.172, ...\n",
      "Connecting to zenodo.org (zenodo.org)|188.184.103.159|:443... connected.\n",
      "HTTP request sent, awaiting response... 301 MOVED PERMANENTLY\n",
      "Location: /records/3338310/files/dcase-ust-taxonomy.yaml [following]\n",
      "--2024-02-21 13:44:09--  https://zenodo.org/records/3338310/files/dcase-ust-taxonomy.yaml\n",
      "Reusing existing connection to zenodo.org:443.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 1081 (1.1K) [application/octet-stream]\n",
      "Saving to: ‘dcase-ust-taxonomy.yaml’\n",
      "\n",
      "dcase-ust-taxonomy. 100%[===================>]   1.06K  5.24KB/s    in 0.2s    \n",
      "\n",
      "2024-02-21 13:44:10 (5.24 KB/s) - ‘dcase-ust-taxonomy.yaml’ saved [1081/1081]\n",
      "\n",
      "--2024-02-21 13:44:10--  https://zenodo.org/record/3338310/files/README.md\n",
      "Resolving zenodo.org (zenodo.org)... 188.184.103.159, 188.184.98.238, 188.185.79.172, ...\n",
      "Connecting to zenodo.org (zenodo.org)|188.184.103.159|:443... connected.\n",
      "HTTP request sent, awaiting response... 301 MOVED PERMANENTLY\n",
      "Location: /records/3338310/files/README.md [following]\n",
      "--2024-02-21 13:44:11--  https://zenodo.org/records/3338310/files/README.md\n",
      "Reusing existing connection to zenodo.org:443.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 8716 (8.5K) [application/octet-stream]\n",
      "Saving to: ‘README.md’\n",
      "\n",
      "README.md           100%[===================>]   8.51K  --.-KB/s    in 0.004s  \n",
      "\n",
      "2024-02-21 13:44:11 (2.26 MB/s) - ‘README.md’ saved [8716/8716]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "os.chdir(ust_data_path)\n",
    "!wget https://zenodo.org/record/3338310/files/annotations.csv\n",
    "!wget https://zenodo.org/record/3338310/files/audio-dev.tar.gz\n",
    "!wget https://zenodo.org/record/3338310/files/audio-eval.tar.gz\n",
    "!wget https://zenodo.org/record/3338310/files/dcase-ust-taxonomy.yaml\n",
    "!wget https://zenodo.org/record/3338310/files/README.md\n",
    "os.chdir(\"audio-dev\")\n",
    "!tar xf ../audio-dev.tar.gz\n",
    "os.chdir(\"..\")\n",
    "!rm audio-dev.tar.gz\n",
    "!tar xf audio-eval.tar.gz\n",
    "!rm audio-eval.tar.gz\n",
    "os.chdir(root_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You should end-up with the following file structure:"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "```\n",
    "data\n",
    "+-- ust-data\n",
    "|   +-- sonyc-ust\n",
    "|   |   +-- audio-dev\n",
    "|   |   |   +-- train\n",
    "|   |   |   |   +-- 01_000006.wav\n",
    "|   |   |   |   +-- ...\n",
    "|   |   |   +-- validate\n",
    "|   |   |   |   +-- 00_000066.wav\n",
    "|   |   |   |   +-- ...\n",
    "|   |   +-- audio-eval\n",
    "|   |   |   +-- 00_010346.wav\n",
    "|   |   |   +-- ...\n",
    "|   |   +-- annotations.csv\n",
    "|   |   +-- dcase-ust-taxonomy.yaml\n",
    "|   |   +-- README.md\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "plaintext"
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}