Commit a5fd54b8 authored by Emmanuel Vazquez's avatar Emmanuel Vazquez
Browse files

Initial commit

parent b00b56ad
# Data
*.csv
*.sav
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
.venv/
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
Raphael Adda
Maxime Charpentier
Nathan Fouqueray
Emmanuel Vazquez
Copyright (c) 2020 CentraleSupelec
MIT Licence
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation files
(the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
#!/usr/bin/env python3
___authors___ = 'N. Fouqueray, E. Vazquez'
__copyright__ = "CentraleSupelec, 2020"
__license__ = "MIT"
__maintainer__ = "E. Vazquez"
__email__ = "emmanuel.vazquez@centralesupelec.fr"
__status__ = "alpha"
"""Import data
Sources
* data.gouv.fr / Santé Publique France
https://www.data.gouv.fr/fr/datasets/donnees-relatives-a-lepidemie-du-covid-19/#_
* ECDC
"""
import urllib.request
from datetime import datetime
from os import getcwd
from os.path import join
import numpy as np
import pandas as pd
class Data():
"Stores url and filenames"
def __init__(self, url, filename):
self.url = url
self.filename = filename
def download_data(d, debug=False):
# Download data
import ssl
# This circumvents the SSL certificate problem
ssl._create_default_https_context = ssl._create_unverified_context
if debug: print('Saving %s at %s' % (d.filename, d.url))
urllib.request.urlretrieve(d.url, d.filename)
def get_data(filename, separator):
# Extract data
dataframe = pd.read_csv(filename, sep=separator, error_bad_lines=False)
return dataframe
def get_fatalities(country='France', reuse=False, debug=False):
# Initialization
if debug: print('Import data...')
root = getcwd()
datapath = join(root, 'data')
# Source defintions
download_JHU = False
datasources = []
url_data_01 = 'https://opendata.ecdc.europa.eu/covid19/casedistribution/csv'
filename_01 = join(datapath, 'data_ECDC_deaths.csv')
datasources.append(Data(url_data_01, filename_01))
if country == 'France':
url_data_02 = 'http://www.data.gouv.fr/fr/datasets/r/63352e38-d353-4b54-bfd1-f1b3ee1cabd7'
# ancienne adresse 'https://www.data.gouv.fr/fr/datasets/r/b94ba7af-c0d6-4055-a883-61160e412115'
filename_02 = join(datapath, 'data_SPF_A.csv')
datasources.append(Data(url_data_02, filename_02))
if download_JHU:
url_data_03 = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
filename_03 = join(datapath, 'data_JHU_deaths.csv')
datasources.append(Data(url_data_03, filename_03))
if debug: print('Download data...')
if not reuse:
for data in datasources:
download_data(data, debug)
# --- Data ECDC.
file = datasources[0].filename
if debug: print('Reading %s...' % file)
df_ECDC = get_data(file, ',')
if debug: print('Number of lines: %d' % df_ECDC.shape[0])
df_ECDC_country = df_ECDC.loc[df_ECDC.loc[:]['countriesAndTerritories'] == country]
df_ECDC_country = df_ECDC_country.copy()
df_ECDC_country = df_ECDC_country.drop(
columns=['day', 'month', 'year', 'countryterritoryCode', 'geoId', 'countryterritoryCode', 'popData2018'])
n = df_ECDC_country.shape[0]
if debug: print(df_ECDC_country.head(10))
# --- Data Santé Publique France
if country == 'France':
file = datasources[1].filename
if debug: print('Reading %s...' % file)
df_SPF = get_data(file, ';')
if debug:
print('Number of lines: %d' % df_SPF.shape[0])
print('\n---')
print(df_SPF.head(10))
print('...\n---\n')
departements = df_SPF.dep.unique()
dates = df_SPF.jour.unique()
# --- Data JH Univ.
if download_JHU:
# Pour le dataframe portant sur le nombre de décès
file = datasources[2].filename
if debug: print('Reading %s...' % file)
df_JHU = get_data(file, ',')
if debug: print('Number of lines: %d' % df_JHU.shape[0])
# Extraction des données France
df_JHU_country = df_JHU.loc[df_JHU.loc[:]['Country/Region'] == 'France']
df_JHU_country_Metrop = df_JHU_country.loc[pd.isna(df_JHU_country.loc[:]['Province/State'])]
df_JHU_country_Metrop = df_JHU_country_Metrop.copy()
df_JHU_country_Metrop.drop(columns=['Province/State', 'Country/Region', 'Lat', 'Long'], inplace=True)
df_JHU = df_JHU_country_Metrop.transpose()
df_JHU.columns = ['Fatalities']
if debug: print(df_JHU.head(10))
n = df_JHU.shape[0]
# --- Fusion données
# construction d'un df "décès" avec les dates en indices et les
# départements en colonnes + agrégation de tous les départements
# extraction des dates
dates_ECDC = df_ECDC_country['dateRep']
dates = [datetime.strptime(date, '%d/%m/%Y') for date in dates_ECDC]
df_dates = pd.DataFrame(dates)
df_dates.columns = ['date']
# extract deaths in ECDC data
if country == 'France':
n_addtional_columns = 96
else:
n_addtional_columns = 0
fatalities = np.empty([n, 1 + n_addtional_columns])
fatalities[:] = np.NaN
fatalities[:, 0] = df_ECDC_country.deaths
df_fatalities = pd.DataFrame(fatalities)
# SPF
if country == 'France':
dpt_list = pd.unique(df_SPF['dep']).tolist()
dpt_list = [dpt_list[i] for i in range(n_addtional_columns)]
else:
dpt_list = []
dpt_list = ['total'] + dpt_list
df_fatalities.columns = dpt_list
df_fatalities = pd.concat([df_dates, df_fatalities], axis=1)
if country == 'France':
# insertion des décès par département
for i in range(df_SPF.shape[0]):
sexe = df_SPF.sexe[i]
if sexe == 0:
date = datetime.strptime(df_SPF.jour[i], '%Y-%m-%d')
dep = df_SPF.dep[i]
dc = df_SPF.dc[i]
td = df_fatalities[df_fatalities.date == date]
if not td.empty and dep in dpt_list:
j = td.index[0]
df_fatalities.loc[j, dep] = int(dc)
# remove all stats before 1/1/2020
idx = (df_fatalities.date >= pd.Timestamp(2020, 1, 1))
df_fatalities = df_fatalities[idx]
# ECDC data are stored by decreasing dates: reverse
df_fatalities = df_fatalities.iloc[::-1]
df_fatalities.index = range(df_fatalities.shape[0])
# cumulate the daily fatalities
df_fatalities['total'] = df_fatalities['total'].cumsum()
return df_fatalities
# -*- coding: utf-8 -*-
""" Documentation
"""
__author__ = "M.Charpentier, E. Vazquez"
__copyright__ = "CentraleSupelec, 2020"
__license__ = "MIT"
__maintainer__ = "E. Vazquez"
__email__ = "emmanuel.vazquez@centralesupelec.fr"
__status__ = "alpha"
import numpy as np
def log_likelihood(theta, data, model, threshold=5):
'''
Inputs
data = [T,Y]
theta = [R0, Tinf, Tinc, pfatal]
param = [N, Gamma, mu, sigma]
Outputs
lP = log_likelihood logp(Y|theta)
Y = simulated vector
'''
# Observations
y_obs = data['yobs'].values.reshape(-1)
sigma_obs = data['sigma'].values.reshape(-1)
# if number of fatalities < 10 the model is innacurate
k0 = 0
while y_obs[k0] < threshold:
k0 += 1
# Simulation
model.simulate(theta)
y_pred = model.y_from_tobs()
# pdb.set_trace()
loglik = -1 / 2 * np.sum(1 / sigma_obs[k0:] ** 2 * (np.log10(y_obs[k0:]) - np.log10(y_pred[k0:])) ** 2)
return loglik
def log_prior_SEIRD(theta):
# intervals correct?
R0 = theta[0] # Basic Reproduction Rate
Tinf = theta[1] # Infection Time
Tinc = theta[2] # Incubation Time
log10pfatal = theta[3] # Death proportion for I compartment
t0 = theta[4] # Start time
if R0 < 0 or R0 > 20:
return -np.inf
elif Tinf < 1 or Tinf > 20:
return -np.inf
elif Tinc < 1 or Tinc > 20:
return -np.inf
elif log10pfatal < -6 or log10pfatal > -1:
return -np.inf
elif t0 < 1 or t0 > 100:
return -np.inf
else:
return 0
def log_prior_SEIRD_with_cutoff(theta):
# intervals correct?
R0 = theta[0] # Basic Reproduction Rate
beta_cut = theta[1] # Reproduction Rate during lockdown
Tinf = theta[2] # Infection Time
Tinc = theta[3] # Incubation Time
log10pfatal = theta[4] # Death proportion for I compartment
t0 = theta[5] # Start time
if R0 < 0 or R0 > 200:
return -np.inf
elif beta_cut < 0 or beta_cut > 1:
return -np.inf
elif Tinf < 1 or Tinf > 30:
return -np.inf
elif Tinc < 1 or Tinc > 10:
return -np.inf
elif log10pfatal < -5 or log10pfatal > -2:
return -np.inf
elif t0 < 1 or t0 > 100:
return -np.inf
else:
return 0
def log_prior(theta, model):
if model.model_type == 'SEIRD':
return log_prior_SEIRD(theta)
elif model.model_type == 'SEIRD_with_cutoff':
return log_prior_SEIRD_with_cutoff(theta)
def log_posterior(theta, data, model, threshold=5):
lp = log_prior(theta, model)
if not np.isfinite(lp):
return -np.inf
else:
ll = log_likelihood(theta, data, model, threshold)
return ll + lp
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Documentation
"""
__author__ = "R. Adda, M.Charpentier, E. Vazquez"
__copyright__ = "CentraleSupelec, 2020"
__license__ = "MIT"
__maintainer__ = "E. Vazquez"
__email__ = "emmanuel.vazquez@centralesupelec.fr"
__status__ = "alpha"
import math
import matplotlib.pyplot as plt
import numpy as np
from scipy.integrate import ode
def SEIRD(t, x, theta, const_params):
# Computes...
#
# Inputs:
# t
# x
# theta = [R, T_inf, T_inc , pfatal]
# ...
# Output:
# dx
# ...
# ...
R0 = theta[0] # Basic Reproduction Rate
Tinf = theta[1] # Infection Time
Tinc = theta[2] # Incubation Time
pfatal = math.pow(10, theta[3]) # Death proportion for I compartment
N = const_params[0]
Gamma = const_params[1]
mu = const_params[2]
S = x[0]
E = x[1]
I = x[2]
R = x[3]
D = x[4]
gamma = 1 / Tinf
a = 1 / Tinc
beta = gamma * R0
dS = Gamma - mu * S - beta / N * I * S
dE = beta / N * I * S - (mu + a) * E
dI = a * E - gamma * (1 + pfatal) * I
dR = gamma * I - mu * R
dD = gamma * pfatal * I
dx = [dS, dE, dI, dR, dD]
return dx
def SEIRD_with_cutoff(t, x, theta, const_params):
# Computes...
#
# Inputs:
# t
# x
# theta = [R, beta_cut, T_inf, T_inc , pfatal]
# ...
# Output:
# dx
# ...
# ...
R0 = theta[0] # Basic Reproduction Rate
beta_cut = theta[1]
Tinf = theta[2] # Infection Time
Tinc = theta[3] # Incubation Time
pfatal = pow(10, theta[4]) # Death proportion for I compartment
N = const_params[0]
Gamma = const_params[1]
mu = const_params[2]
cutoff_time = const_params[3]
lift_time = const_params[4]
S = x[0]
E = x[1]
I = x[2]
R = x[3]
D = x[4]
gamma = 1 / Tinf
a = 1 / Tinc
if t >= cutoff_time and t < lift_time:
beta = beta_cut * gamma * R0
else:
beta = gamma * R0
dS = Gamma - mu * S - beta / N * I * S
dE = beta / N * I * S - (mu + a) * E
dI = a * E - gamma * (1 + pfatal) * I
dR = gamma * I - mu * R
dD = gamma * pfatal * I
dx = [dS, dE, dI, dR, dD]
return dx
class model:
'''
Attributes
model_type :
f : RHS of the ODE
...
...
theta : parameters
t : time axis for simulation
x : [S (Susceptible), E (Exposed), I (Infectious), R (Recovered), D (Death)] along time
Methods
simulate
...
plot
'''
def __init__(self, config, tobs):
# Initialization
# 1. Model choice
self.set_model_type(config)
# 2. set regional parameters
self.set_regional_params(config)
# 3. simulation parameters
self.set_simulation_params(config)
# 4. build t and preallocate x
self.prepare_simulations(config, tobs)
def set_model_type(self, config):
# Model type
self.model_type = config['model_type']
if self.model_type == 'SEIRD':
self.f = SEIRD
self.state_dim = 5 # state dim
self.theta_dim = 5 # theta dim
elif self.model_type == 'SEIRD_with_cutoff':
self.f = SEIRD_with_cutoff
self.state_dim = 5 # state dim
self.theta_dim = 6 # theta dim
else:
raise('model type is not implemented')
def set_regional_params(self, config):
# Regional parameters
if self.model_type == 'SEIRD':
self.regional_params = [config['N'], config['Gamma'], config['mu']]
elif self.model_type == 'SEIRD_with_cutoff':
self.regional_params = [config['N'], config['Gamma'], config['mu'],
config['cutoff_time'],
config['lift_time']]
def set_simulation_params(self, config):
# Simulation parameters
self.time_step = config['sim_step']
self.sim_duration = config['sim_duration']
def prepare_simulations(self, config, tobs):
# build t and preallocate x
self.t = np.arange(0, self.sim_duration, self.time_step)
self.x = np.zeros([self.t.shape[0], self.state_dim]) # preallocation
# start date, assume a a datetime object for t0_ref and convert to numpy datetime object
self.t0_refdate = np.datetime64(config['t0_refdate'])
# assume a pandas dataframe of timestamp objects and convert to numpy datetime objects
tobs_rel = [tobs[i] - self.t0_refdate for i in range(tobs.shape[0])]
# pdb.set_trace()