Skip to content
Snippets Groups Projects
Commit e880984b authored by Hermange Gurvan's avatar Hermange Gurvan
Browse files

chap 4 - Annexe - script

parent 90321898
Branches main
No related tags found
No related merge requests found
#################################
### Script Julia ###
### Model ###
#################################
@everywhere using DataFrames, CSV # To Load data
@everywhere using Distributions, Random
@everywhere using BenchmarkTools
@everywhere using MarkovProcesses # Statistical framework to perform SMC-ABC
@everywhere using Distributions, Random
@everywhere using StatsBase
@everywhere using SpecialFunctions
@everywhere using JLD2, HDF5, FileIO
#include("/home/gurvan/Documents/framework/bayesian-inference/core/environment.jl")
include("/workdir/hermangeg/bayesian-inference/core/environment.jl")
### Implementation - Generalized Gamma ###
@everywhere struct GeneralizedGamma <: ContinuousUnivariateDistribution
a::Float64
d::Float64
p::Float64
end
@everywhere function GeneralizedGamma(a,d,p)
GeneralizedGamma(a,d,p)
end
@everywhere function param_a_towards_mu(a,d,p)
mu = log(a) + log(d/p)/p
sig = 1/sqrt(p*d)
Q = sqrt(p/d)* sign(p)
return mu,sig,Q
end
@everywhere function param_mu_toward_a(mu,sig,Q)
d = 1/(sig*Q)
p = Q/sig
a =abs(Q)^(2 / p) * exp(mu)
return a,d,p
end
@everywhere function Distributions.pdf(distr::GeneralizedGamma,x::Real)
a = distr.a
d = distr.d
p = distr.p
return sign(p)*(p/a^d)/gamma(d/p) * x^(d-1) * exp(-(x/a)^p)
end
@everywhere function Distributions.cdf(distr::GeneralizedGamma,x::Real)
a = distr.a
d = distr.d
p = distr.p
d_gamma = Gamma(d/p,1)
return 1 - cdf(d_gamma, (x/a)^p)
end
# Use the algorithm given in the documentation pages for the R package flexsurv.
@everywhere function Distributions.rand(distr::GeneralizedGamma)
mu, sig, Q = param_a_towards_mu(distr.a,distr.d,distr.p)
Qs = Q^2
gamma_deviate = rand(Distributions.Gamma(1 / Q^2, 1)) # only saves 10 or so percent time.
w = log(Qs * gamma_deviate) / Q
return exp(mu + sig * w)
end
##################
dummy = @network_model begin
HSC: (HSC => 2HSC, p1_1*sig)
MPP: (MPP => 2MPP, p2_2*mu2)
HPC: (HPC => 2HPC, p3_3*mu3)
CD34neg: (CD34neg => 2CD34neg, mu4*p_link_timing*p_link_diff*rho_timing*rho_diff )
end "Branching process for hematopoieisis"
@everywhere nb_types = 4
@everywhere nb_gen_max = 6
@everywhere mutable struct ComplexModel <: Model
network_model::ContinuousTimeModel
type_depart_exp::Vector{Int64}
t_max_exp::Vector{Float64}
end
@everywhere struct ComplexTrajectory <: AbstractTrajectory
T_incucyte::Matrix{Int64}
mat_multigen::Array{Any}
end
@everywhere function div_daughter(t_app, T_max, mother_type, gen_mother,
mat_transition, rho_diff, p_link_diff,
times_v1, times_v2, rho_timing, p_link_timing,
arr_T, arr_type)
#t_app has to be < T_max
#Which type for the daughter cells ?
daughter1 = sample(collect(1:nb_types), Weights(mat_transition[mother_type,:]))
daughter2 = sample(collect(1:nb_types), Weights(mat_transition[mother_type,:]))
if p_link_diff > 0.5
if rand() <= rho_diff
daughter2 = daughter1
end
end
arr_type[min(gen_mother, nb_gen_max), mother_type] -= 1
arr_type[min(gen_mother+1, nb_gen_max), daughter1] += 1
arr_type[min(gen_mother+1, nb_gen_max), daughter2] += 1
#Division of daughter cells
sig1 = times_v2[daughter1]
#sig2 = times_v2[daughter2]
if p_link_timing > 0.5 && rho_timing > 0 && sig1^2 > 0 #&& daughter1 == daughter2
M = 0.5 .*(sig1^2 .* [1.0 rho_timing ; rho_timing 1.0] + (sig1^2 .* [1.0 rho_timing ; rho_timing 1.0])')
distrib = MvLogNormal( [times_v1[daughter1],times_v1[daughter2]], M )
(T1,T2) = rand(distrib) .+ t_app
else
T1 = t_app + rand(LogNormal(times_v1[daughter1], times_v2[daughter1]))
T2 = t_app + rand(LogNormal(times_v1[daughter2], times_v2[daughter2]))
end
if gen_mother == 1
arr_T[2] = min(T1,T2)
arr_T[3] = max(T1,T2)
elseif gen_mother == 2
if arr_T[4] == 0.0
arr_T[4] = T1
arr_T[5] = T2
else
arr_T[6] = T1
arr_T[7] = T2
end
end
if T1 < T_max
div_daughter(T1, T_max, daughter1, gen_mother+1,
mat_transition, rho_diff, p_link_diff,
times_v1, times_v2, rho_timing, p_link_timing,
arr_T, arr_type)
end
if T2 < T_max
div_daughter(T2, T_max, daughter2, gen_mother+1,
mat_transition, rho_diff, p_link_diff,
times_v1, times_v2, rho_timing, p_link_timing,
arr_T, arr_type)
end
end
@everywhere function recover_multigen(arr_type)
nb_cells = sum(arr_type)
recovery_rate = 0.71
for i in 1:size(arr_type,1)
for j in 1:size(arr_type,2)
if arr_type[i,j] != 0
arr_type[i,j] = rand(Binomial(arr_type[i,j],recovery_rate))
end
end
end
return arr_type
end
@everywhere function censor_times(arr_T)
arr_T_censor = zeros(Int64,length(arr_T))
for i in 1:length(arr_T)
if arr_T[i] == 0.0
arr_T_censor[i] = 0
else
arr_T_censor[i] = ceil(Int,arr_T[i])
end
end
return arr_T_censor
end
#Simulation
@everywhere function simulate(model::ComplexModel;
p::Union{Nothing,AbstractVector{Float64}} = nothing)
p_sim = (model.network_model).p
param_to_idx = model.network_model.map_param_idx
#Building vectors and matrix transition
#First daughter cell
mat_transition = zeros(nb_types,nb_types)
#HSC ->
mat_transition[1,1] = p_sim[param_to_idx[:p1_1]]
mat_transition[1,2] = (1.0 - mat_transition[1,1])/3
mat_transition[1,3] = (1.0 - mat_transition[1,1])/3
p1_4 = 1.0 - mat_transition[1,1] - mat_transition[1,2] - mat_transition[1,3]
mat_transition[1,4] = p1_4
#MPP -> ...
mat_transition[2,2] = p_sim[param_to_idx[:p2_2]]
mat_transition[2,3] = (1.0 - mat_transition[2,2])/2
p2_4 = 1.0 - mat_transition[2,2] - mat_transition[2,3]
mat_transition[2,4] = p2_4
#HPC -> ...
mat_transition[3,3] = p_sim[param_to_idx[:p3_3]]
p3_4 = 1.0 - mat_transition[3,3]
mat_transition[3,4] = p3_4
#CD34-
mat_transition[4,4] = 1.0
times_v1 = [
p_sim[param_to_idx[:mu2]],
p_sim[param_to_idx[:mu2]],
p_sim[param_to_idx[:mu3]],
p_sim[param_to_idx[:mu4]],
]
times_v2 = [
p_sim[param_to_idx[:sig]],
p_sim[param_to_idx[:sig]],
p_sim[param_to_idx[:sig]],
p_sim[param_to_idx[:sig]],
]
nb_wells = length(model.t_max_exp)
mat_T = zeros(Int64,nb_wells,8) #7 -> T1,T2.1, ..., T3.4+colony_size
mat_multigen = Array{Any}(undef, nb_wells)
for i in 1:nb_wells
ini = model.type_depart_exp[i]
arr_T = zeros(7)
arr_type = zeros(Int64,nb_gen_max, nb_types)
arr_type[1,ini] = 1
#First division
t1 = 0.0
if ini == 1 #HSC
a_HSC,d_HSC, p_HSC = param_mu_toward_a(3.9054681, 0.1695595, -0.4932141)
distr_HSC = GeneralizedGamma(a_HSC,d_HSC, p_HSC)
t1 = rand(distr_HSC)
elseif ini == 2 #MPP
t1 = rand(LogNormal(3.84,0.196))
else #HPC
t1 = rand(LogNormal(3.7,0.236))
end
arr_T[1] = t1
#Then
if t1 < model.t_max_exp[i]
div_daughter(t1, model.t_max_exp[i], ini, 1,
mat_transition, p_sim[param_to_idx[:rho_diff]],p_sim[param_to_idx[:p_link_diff]],
times_v1, times_v2, p_sim[param_to_idx[:rho_timing]],p_sim[param_to_idx[:p_link_timing]],
arr_T, arr_type)
end
mat_T[i,1:7] = censor_times(arr_T)
mat_T[i,8] = sum(arr_type)
mat_multigen[i] = recover_multigen(copy(arr_type))
end
return ComplexTrajectory(mat_T, mat_multigen)
end
@everywhere import MarkovProcesses: get_proba_model
@everywhere get_proba_model(model::ComplexModel) = model.network_model
@everywhere N_wells = 1000
modelHematopo = ComplexModel(dummy,
vcat([1 for i in 1:N_wells], [2 for i in 1:N_wells],[3 for i in 1:N_wells]), #type_depart_exp
[96 for i in 1:3*N_wells] #t_max_exp
)
### Estimation ###
@everywhere function compute_ss(T_incucyte, mat_multigen)
ss = zeros(11)
### HSC ###
T1 = T_incucyte[1:N_wells,1]
T2_1 = T_incucyte[1:N_wells,2] #Value 0 means >96h
T2_2 = T_incucyte[1:N_wells,3]
T1_pos = T1[T1.>0]
T2_1_pos = T2_1[T2_1.>0]
T2_2_pos = T2_2[T2_2.>0]
ss[1] = cor(T2_1[T2_2.>0],T2_2_pos)
ss[3] = cor(T2_1[T2_2.>0].-T1[T2_2.>0],T2_2_pos.-T1[T2_2.>0])
mat = mat_multigen[1:N_wells,1]
rep_gen = zeros(length(mat),nb_gen_max)
nb_gen_0 = zeros(length(mat))
nb_cells_effectif = zeros(length(mat))
for i in 1:length(mat)
rep_gen[i,:] = sum(mat[i],dims=2)'
end
ss[5] = mean(rep_gen[:,2]) #HSC
#Type
rep_type = zeros(length(mat),4)
nb_type_0 = zeros(length(mat))
for i in 1:length(mat)
rep_type[i,:] = sum(mat[i],dims=1)
nb_type_0[i] = sum(rep_type[i,:] .== 0)
end
ss[8] = std(rep_type[:,1])
ss[11] = mean(nb_type_0.==3)
#### MPP ####
T1 = T_incucyte[N_wells+1:2*N_wells,1]
T2_1 = T_incucyte[N_wells+1:2*N_wells,2] #Value 0 means >96h
T2_2 = T_incucyte[N_wells+1:2*N_wells,3]
T1_pos = T1[T1.>0]
T2_1_pos = T2_1[T2_1.>0]
T2_2_pos = T2_2[T2_2.>0]
ss[4] = cor(T2_1[T2_2.>0].-T1[T2_2.>0],T2_2_pos.-T1[T2_2.>0])
mat = mat_multigen[N_wells+1:2*N_wells,1]
rep_gen = zeros(length(mat),nb_gen_max)
nb_gen_0 = zeros(length(mat))
nb_cells_effectif = zeros(length(mat))
for i in 1:length(mat)
rep_gen[i,:] = sum(mat[i],dims=2)'
nb_gen_0[i] = sum(rep_gen[i,:] .== 0)
nb_cells_effectif[i] = sum(rep_gen[i,:]./collect(1:6))
end
sum_cells = sum(rep_gen,dims=2)
ss[6] = mean(nb_gen_0.==3)
ss[7] = std(nb_cells_effectif)
#Type
rep_type = zeros(length(mat),4)
nb_type_0 = zeros(length(mat))
for i in 1:length(mat)
rep_type[i,:] = sum(mat[i],dims=1)
nb_type_0[i] = sum(rep_type[i,:] .== 0)
end
ss[9] = mean(rep_type[:,4].==sum_cells)
ss[10] = mean(nb_type_0.==1)
### HPC ###
T1 = T_incucyte[2*N_wells+1:end,1]
T2_1 = T_incucyte[2*N_wells+1:end,2] #Value 0 means >96h
T2_1_pos = T2_1[T2_1.>0]
ss[2] = std(T2_1_pos.-T1[T2_1.>0])
std_ss = [0.052487633317557174,
1.393397653056375 ,
0.15082454528887287 ,
0.15082454528887287 ,
0.019604757873945413,
0.13145691847064636 ,
1.5122084649570302 ,
0.30432294059137266 ,
0.3061549672300609 ,
0.11732217503815727 ,
0.28111916786340413 ]
return ss, std_ss
end
# Distance function for hybrid_invasion model
@everywhere function hybrid_dist_obs(vec_sim, vec_observations)
ss_obs = vec_observations[1]
simu = vec_sim[1]
ss_sim, std_ss = compute_ss(simu.T_incucyte, simu.mat_multigen)
return mean_error = sqrt(sum( ((ss_sim .- ss_obs)./std_ss).^2))
end
#Custom joint prior distribution for our paramaters
@everywhere struct PriorDistribution <: ContinuousMultivariateDistribution end
@everywhere function Distributions.length(d::PriorDistribution)
return 11 #Number of parameters
end
@everywhere function Distributions.rand(d::PriorDistribution)
p1_1 = rand(Uniform(0.0,1))
sig = rand(Uniform(0.1,0.4))
p2_2 = rand(Uniform(0.0,1))
mu_2 = rand(Uniform(2.2,3.2))
p3_3 = rand(Uniform(0.0,1))
mu_3 = rand(Uniform(2.2,3.2))
mu_4 = rand(Uniform(2.2,3.2))
p_link_timing = rand(Uniform(0.0,1))
p_link_diff = rand(Uniform(0.0,1))
rho_timing = rand(Uniform(0.0,1))
rho_diff = rand(Uniform(0.0,1))
return [p1_1,
sig,
p2_2,mu_2,
p3_3,mu_3,
mu_4,p_link_timing,p_link_diff,rho_timing,rho_diff]
end
@everywhere function Distributions.rand!(d::PriorDistribution,vec_p::Array{Float64,1})
vec_p[:] = rand(d)
end
@everywhere function Distributions.pdf(d::PriorDistribution, X::AbstractArray{T,1} where T=11)
lim_inf = [0.0,0.1,0.0,2.2,0.0,2.2,2.2,0.0,0.0,0.0,0.0]
lim_sup = [1,0.4,1,3.2,1,3.2,3.2,1,1,1,1]
if prod(X .>= lim_inf) * prod(X .<= lim_sup)==1.0
return 1.0/prod(lim_sup .- lim_inf)
else
return 0.0
end
end
@everywhere function Distributions.insupport(d::PriorDistribution, x::Array{Float64,1})
return pdf(d,x) != 0 # in support if non zero
end
parametric_model = ParametricModel(modelHematopo, [:p1_1,
:sig,
:p2_2,:mu2,
:p3_3,:mu3,
:mu4,:p_link_timing,:p_link_diff,:rho_timing,:rho_diff], PriorDistribution())
@everywhere vec_observations = [
0.9619724128320803 ,
4.0896317302459595 ,
0.8282983755046477 ,
0.8287005006329901 ,
0.05732484076433121,
0.11042944785276074,
0.9333702560294956 ,
0.9598335366591979 ,
0.5030674846625767 ,
0.07975460122699386,
0.5605095541401274 ,
];
### Load LHS and best params values
theta_LHS = Matrix(CSV.read("/workdir/hermangeg/curie/CMAES_thesis_ale/theta_big.csv", DataFrame, header=false));
res_sum_stats = Matrix(CSV.read("/workdir/hermangeg/curie/CMAES_thesis_ale/summary_stats.csv", DataFrame, header=false))
df = DataFrame(d=res_sum_stats[:,end], o=collect(1:size(theta_LHS,1)))
sort!(df)
##CMAES
function model(X)
end
function noiseModel(X)
set_param!(modelHematopo, X["parameters"][1:11]);
epsilon = hybrid_dist_obs([simulate(modelHematopo)], [vec_observations])
return X["likelihood"] = exp(-epsilon^2)
end
e = newEnvironment()
lim_inf = [0.0,0.1,0.0,2.2,0.0,2.2,2.2,0.0,0.0,0.0,0.0]
lim_sup = [1,0.4,1,3.2,1,3.2,3.2,1,1,1,1]
for i in 1:11
push!(e["model"]["parameters"], Uniform(lim_inf[i],lim_sup[i])) #1 beta
end
#We indicate what is our computational model (name of the function)
e["model"]["computational model"] = model
e["likelihood"]["type"] = "perso"
e["likelihood"]["computational likelihood"] = noiseModel;
C_prior = 1.0/prod(lim_sup .- lim_inf)
e["task"] = "parameter estimation"
e["algo"]["type"] = "CMAES";
covMat = Matrix(1I, 11, 11);
e["algo"]["variance"] = covMat;
e["algo"]["sigma"] = 0.05
e["algo"]["min gen tol"] = 100
e["algo"]["max iter"] = 100
e["algo"]["lambda"] = 300
e["algo"]["ratio limite lambda"] = 1
e["algo"]["start"] = "distribution"
e["algo"]["sav_all_gen"] = true
for i in 1:1
theta_ini = theta_LHS[df[i,:o],:]
arr_res = zeros(11*2+1+1+2)
arr_res[1:11] = theta_ini
arr_res[12] = i
arr_res[13] = df[i,:d]
e["algo"]["distribution"] = theta_ini
s = rand(collect(1:1000))
arr_res[14] = s
Random.seed!(s)
@time run(e);
arr_res[15:15+10] = e["results"]["m"]
# Save Full res
save(string("/workdir/hermangeg/curie/CMAES_thesis_gurvan/res/extRes_CMAES_",i,"_seed",s,".jld2"), e["results"]["sample"])
dist = zeros(300)
set_param!(modelHematopo, e["results"]["m"]);
for ii in 1:300
epsilon = hybrid_dist_obs([simulate(modelHematopo)], [vec_observations])
dist[ii] = epsilon
end
arr_res[end]= median(dist)
CSV.write(string("/workdir/hermangeg/curie/CMAES_thesis_gurvan/res/res_CMAES_",i,"_seed",s,".csv"), DataFrame(v=arr_res), header=false)
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment