diff --git a/chapter4/C-script/optimisationCMAES.jl b/chapter4/C-script/optimisationCMAES.jl new file mode 100644 index 0000000000000000000000000000000000000000..1b9e0a6620b609453b791c36a8a42c49819f1129 --- /dev/null +++ b/chapter4/C-script/optimisationCMAES.jl @@ -0,0 +1,556 @@ +################################# +### Script Julia ### +### Model ### +################################# +@everywhere using DataFrames, CSV # To Load data +@everywhere using Distributions, Random +@everywhere using BenchmarkTools +@everywhere using MarkovProcesses # Statistical framework to perform SMC-ABC +@everywhere using Distributions, Random +@everywhere using StatsBase +@everywhere using SpecialFunctions +@everywhere using JLD2, HDF5, FileIO +#include("/home/gurvan/Documents/framework/bayesian-inference/core/environment.jl") +include("/workdir/hermangeg/bayesian-inference/core/environment.jl") + + +### Implementation - Generalized Gamma ### + +@everywhere struct GeneralizedGamma <: ContinuousUnivariateDistribution + a::Float64 + d::Float64 + p::Float64 +end + +@everywhere function GeneralizedGamma(a,d,p) + GeneralizedGamma(a,d,p) +end + +@everywhere function param_a_towards_mu(a,d,p) + mu = log(a) + log(d/p)/p + sig = 1/sqrt(p*d) + Q = sqrt(p/d)* sign(p) + + return mu,sig,Q + +end + +@everywhere function param_mu_toward_a(mu,sig,Q) + d = 1/(sig*Q) + p = Q/sig + a =abs(Q)^(2 / p) * exp(mu) + return a,d,p +end + +@everywhere function Distributions.pdf(distr::GeneralizedGamma,x::Real) + a = distr.a + d = distr.d + p = distr.p + return sign(p)*(p/a^d)/gamma(d/p) * x^(d-1) * exp(-(x/a)^p) +end + +@everywhere function Distributions.cdf(distr::GeneralizedGamma,x::Real) + a = distr.a + d = distr.d + p = distr.p + d_gamma = Gamma(d/p,1) + return 1 - cdf(d_gamma, (x/a)^p) +end + +# Use the algorithm given in the documentation pages for the R package flexsurv. +@everywhere function Distributions.rand(distr::GeneralizedGamma) + mu, sig, Q = param_a_towards_mu(distr.a,distr.d,distr.p) + Qs = Q^2 + gamma_deviate = rand(Distributions.Gamma(1 / Q^2, 1)) # only saves 10 or so percent time. + w = log(Qs * gamma_deviate) / Q + return exp(mu + sig * w) +end + +################## + + + +dummy = @network_model begin + HSC: (HSC => 2HSC, p1_1*sig) + MPP: (MPP => 2MPP, p2_2*mu2) + HPC: (HPC => 2HPC, p3_3*mu3) + CD34neg: (CD34neg => 2CD34neg, mu4*p_link_timing*p_link_diff*rho_timing*rho_diff ) + end "Branching process for hematopoieisis" + +@everywhere nb_types = 4 +@everywhere nb_gen_max = 6 + +@everywhere mutable struct ComplexModel <: Model + network_model::ContinuousTimeModel + type_depart_exp::Vector{Int64} + t_max_exp::Vector{Float64} +end + +@everywhere struct ComplexTrajectory <: AbstractTrajectory + T_incucyte::Matrix{Int64} + mat_multigen::Array{Any} +end + +@everywhere function div_daughter(t_app, T_max, mother_type, gen_mother, + mat_transition, rho_diff, p_link_diff, + times_v1, times_v2, rho_timing, p_link_timing, + arr_T, arr_type) + #t_app has to be < T_max + + #Which type for the daughter cells ? + daughter1 = sample(collect(1:nb_types), Weights(mat_transition[mother_type,:])) + daughter2 = sample(collect(1:nb_types), Weights(mat_transition[mother_type,:])) + if p_link_diff > 0.5 + if rand() <= rho_diff + daughter2 = daughter1 + end + end + + arr_type[min(gen_mother, nb_gen_max), mother_type] -= 1 + arr_type[min(gen_mother+1, nb_gen_max), daughter1] += 1 + arr_type[min(gen_mother+1, nb_gen_max), daughter2] += 1 + + #Division of daughter cells + sig1 = times_v2[daughter1] + #sig2 = times_v2[daughter2] + if p_link_timing > 0.5 && rho_timing > 0 && sig1^2 > 0 #&& daughter1 == daughter2 + + + M = 0.5 .*(sig1^2 .* [1.0 rho_timing ; rho_timing 1.0] + (sig1^2 .* [1.0 rho_timing ; rho_timing 1.0])') + distrib = MvLogNormal( [times_v1[daughter1],times_v1[daughter2]], M ) + + (T1,T2) = rand(distrib) .+ t_app + else + T1 = t_app + rand(LogNormal(times_v1[daughter1], times_v2[daughter1])) + T2 = t_app + rand(LogNormal(times_v1[daughter2], times_v2[daughter2])) + end + + if gen_mother == 1 + arr_T[2] = min(T1,T2) + arr_T[3] = max(T1,T2) + elseif gen_mother == 2 + if arr_T[4] == 0.0 + arr_T[4] = T1 + arr_T[5] = T2 + else + arr_T[6] = T1 + arr_T[7] = T2 + end + end + + if T1 < T_max + div_daughter(T1, T_max, daughter1, gen_mother+1, + mat_transition, rho_diff, p_link_diff, + times_v1, times_v2, rho_timing, p_link_timing, + arr_T, arr_type) + end + if T2 < T_max + div_daughter(T2, T_max, daughter2, gen_mother+1, + mat_transition, rho_diff, p_link_diff, + times_v1, times_v2, rho_timing, p_link_timing, + arr_T, arr_type) + end +end + + +@everywhere function recover_multigen(arr_type) + nb_cells = sum(arr_type) + recovery_rate = 0.71 + for i in 1:size(arr_type,1) + for j in 1:size(arr_type,2) + if arr_type[i,j] != 0 + arr_type[i,j] = rand(Binomial(arr_type[i,j],recovery_rate)) + end + end + end + return arr_type +end + +@everywhere function censor_times(arr_T) + arr_T_censor = zeros(Int64,length(arr_T)) + for i in 1:length(arr_T) + if arr_T[i] == 0.0 + arr_T_censor[i] = 0 + else + arr_T_censor[i] = ceil(Int,arr_T[i]) + end + end + return arr_T_censor +end + +#Simulation +@everywhere function simulate(model::ComplexModel; + p::Union{Nothing,AbstractVector{Float64}} = nothing) + + p_sim = (model.network_model).p + param_to_idx = model.network_model.map_param_idx + + #Building vectors and matrix transition + + #First daughter cell + mat_transition = zeros(nb_types,nb_types) + + #HSC -> + mat_transition[1,1] = p_sim[param_to_idx[:p1_1]] + mat_transition[1,2] = (1.0 - mat_transition[1,1])/3 + mat_transition[1,3] = (1.0 - mat_transition[1,1])/3 + p1_4 = 1.0 - mat_transition[1,1] - mat_transition[1,2] - mat_transition[1,3] + mat_transition[1,4] = p1_4 + + #MPP -> ... + mat_transition[2,2] = p_sim[param_to_idx[:p2_2]] + mat_transition[2,3] = (1.0 - mat_transition[2,2])/2 + p2_4 = 1.0 - mat_transition[2,2] - mat_transition[2,3] + mat_transition[2,4] = p2_4 + + #HPC -> ... + mat_transition[3,3] = p_sim[param_to_idx[:p3_3]] + p3_4 = 1.0 - mat_transition[3,3] + mat_transition[3,4] = p3_4 + + #CD34- + mat_transition[4,4] = 1.0 + + + + + times_v1 = [ + p_sim[param_to_idx[:mu2]], + p_sim[param_to_idx[:mu2]], + p_sim[param_to_idx[:mu3]], + p_sim[param_to_idx[:mu4]], + ] + + times_v2 = [ + p_sim[param_to_idx[:sig]], + p_sim[param_to_idx[:sig]], + p_sim[param_to_idx[:sig]], + p_sim[param_to_idx[:sig]], + ] + + nb_wells = length(model.t_max_exp) + mat_T = zeros(Int64,nb_wells,8) #7 -> T1,T2.1, ..., T3.4+colony_size + mat_multigen = Array{Any}(undef, nb_wells) + + for i in 1:nb_wells + ini = model.type_depart_exp[i] + arr_T = zeros(7) + arr_type = zeros(Int64,nb_gen_max, nb_types) + arr_type[1,ini] = 1 + + #First division + t1 = 0.0 + if ini == 1 #HSC + a_HSC,d_HSC, p_HSC = param_mu_toward_a(3.9054681, 0.1695595, -0.4932141) + distr_HSC = GeneralizedGamma(a_HSC,d_HSC, p_HSC) + t1 = rand(distr_HSC) + elseif ini == 2 #MPP + t1 = rand(LogNormal(3.84,0.196)) + else #HPC + t1 = rand(LogNormal(3.7,0.236)) + end + arr_T[1] = t1 + + + + #Then + if t1 < model.t_max_exp[i] + div_daughter(t1, model.t_max_exp[i], ini, 1, + mat_transition, p_sim[param_to_idx[:rho_diff]],p_sim[param_to_idx[:p_link_diff]], + times_v1, times_v2, p_sim[param_to_idx[:rho_timing]],p_sim[param_to_idx[:p_link_timing]], + arr_T, arr_type) + + end + mat_T[i,1:7] = censor_times(arr_T) + mat_T[i,8] = sum(arr_type) + mat_multigen[i] = recover_multigen(copy(arr_type)) + end + + return ComplexTrajectory(mat_T, mat_multigen) + +end + +@everywhere import MarkovProcesses: get_proba_model +@everywhere get_proba_model(model::ComplexModel) = model.network_model + + + + +@everywhere N_wells = 1000 +modelHematopo = ComplexModel(dummy, + vcat([1 for i in 1:N_wells], [2 for i in 1:N_wells],[3 for i in 1:N_wells]), #type_depart_exp + [96 for i in 1:3*N_wells] #t_max_exp + ) + +### Estimation ### +@everywhere function compute_ss(T_incucyte, mat_multigen) + + ss = zeros(11) + + ### HSC ### + T1 = T_incucyte[1:N_wells,1] + T2_1 = T_incucyte[1:N_wells,2] #Value 0 means >96h + T2_2 = T_incucyte[1:N_wells,3] + + T1_pos = T1[T1.>0] + T2_1_pos = T2_1[T2_1.>0] + T2_2_pos = T2_2[T2_2.>0] + + + + ss[1] = cor(T2_1[T2_2.>0],T2_2_pos) + ss[3] = cor(T2_1[T2_2.>0].-T1[T2_2.>0],T2_2_pos.-T1[T2_2.>0]) + + mat = mat_multigen[1:N_wells,1] + rep_gen = zeros(length(mat),nb_gen_max) + nb_gen_0 = zeros(length(mat)) + nb_cells_effectif = zeros(length(mat)) + for i in 1:length(mat) + rep_gen[i,:] = sum(mat[i],dims=2)' + end + + + ss[5] = mean(rep_gen[:,2]) #HSC + + #Type + + rep_type = zeros(length(mat),4) + nb_type_0 = zeros(length(mat)) + for i in 1:length(mat) + rep_type[i,:] = sum(mat[i],dims=1) + nb_type_0[i] = sum(rep_type[i,:] .== 0) + end + + ss[8] = std(rep_type[:,1]) + ss[11] = mean(nb_type_0.==3) + + #### MPP #### + T1 = T_incucyte[N_wells+1:2*N_wells,1] + T2_1 = T_incucyte[N_wells+1:2*N_wells,2] #Value 0 means >96h + T2_2 = T_incucyte[N_wells+1:2*N_wells,3] + + T1_pos = T1[T1.>0] + T2_1_pos = T2_1[T2_1.>0] + T2_2_pos = T2_2[T2_2.>0] + + + ss[4] = cor(T2_1[T2_2.>0].-T1[T2_2.>0],T2_2_pos.-T1[T2_2.>0]) + + mat = mat_multigen[N_wells+1:2*N_wells,1] + rep_gen = zeros(length(mat),nb_gen_max) + nb_gen_0 = zeros(length(mat)) + nb_cells_effectif = zeros(length(mat)) + for i in 1:length(mat) + rep_gen[i,:] = sum(mat[i],dims=2)' + nb_gen_0[i] = sum(rep_gen[i,:] .== 0) + nb_cells_effectif[i] = sum(rep_gen[i,:]./collect(1:6)) + end + sum_cells = sum(rep_gen,dims=2) + + ss[6] = mean(nb_gen_0.==3) + ss[7] = std(nb_cells_effectif) + + #Type + + rep_type = zeros(length(mat),4) + nb_type_0 = zeros(length(mat)) + for i in 1:length(mat) + rep_type[i,:] = sum(mat[i],dims=1) + nb_type_0[i] = sum(rep_type[i,:] .== 0) + end + + ss[9] = mean(rep_type[:,4].==sum_cells) + ss[10] = mean(nb_type_0.==1) + + ### HPC ### + T1 = T_incucyte[2*N_wells+1:end,1] + T2_1 = T_incucyte[2*N_wells+1:end,2] #Value 0 means >96h + + T2_1_pos = T2_1[T2_1.>0] + + ss[2] = std(T2_1_pos.-T1[T2_1.>0]) + + std_ss = [0.052487633317557174, + 1.393397653056375 , + 0.15082454528887287 , + 0.15082454528887287 , + 0.019604757873945413, + 0.13145691847064636 , + 1.5122084649570302 , + 0.30432294059137266 , + 0.3061549672300609 , + 0.11732217503815727 , + 0.28111916786340413 ] + + return ss, std_ss +end + +# Distance function for hybrid_invasion model +@everywhere function hybrid_dist_obs(vec_sim, vec_observations) + ss_obs = vec_observations[1] + simu = vec_sim[1] + + ss_sim, std_ss = compute_ss(simu.T_incucyte, simu.mat_multigen) + + + return mean_error = sqrt(sum( ((ss_sim .- ss_obs)./std_ss).^2)) + +end + + +#Custom joint prior distribution for our paramaters +@everywhere struct PriorDistribution <: ContinuousMultivariateDistribution end + +@everywhere function Distributions.length(d::PriorDistribution) + return 11 #Number of parameters +end + + + + +@everywhere function Distributions.rand(d::PriorDistribution) + p1_1 = rand(Uniform(0.0,1)) + sig = rand(Uniform(0.1,0.4)) + p2_2 = rand(Uniform(0.0,1)) + mu_2 = rand(Uniform(2.2,3.2)) + p3_3 = rand(Uniform(0.0,1)) + mu_3 = rand(Uniform(2.2,3.2)) + mu_4 = rand(Uniform(2.2,3.2)) + p_link_timing = rand(Uniform(0.0,1)) + p_link_diff = rand(Uniform(0.0,1)) + rho_timing = rand(Uniform(0.0,1)) + rho_diff = rand(Uniform(0.0,1)) + + return [p1_1, + sig, + p2_2,mu_2, + p3_3,mu_3, + mu_4,p_link_timing,p_link_diff,rho_timing,rho_diff] +end + +@everywhere function Distributions.rand!(d::PriorDistribution,vec_p::Array{Float64,1}) + vec_p[:] = rand(d) +end + +@everywhere function Distributions.pdf(d::PriorDistribution, X::AbstractArray{T,1} where T=11) + lim_inf = [0.0,0.1,0.0,2.2,0.0,2.2,2.2,0.0,0.0,0.0,0.0] + lim_sup = [1,0.4,1,3.2,1,3.2,3.2,1,1,1,1] + + if prod(X .>= lim_inf) * prod(X .<= lim_sup)==1.0 + return 1.0/prod(lim_sup .- lim_inf) + else + return 0.0 + end + +end + +@everywhere function Distributions.insupport(d::PriorDistribution, x::Array{Float64,1}) + return pdf(d,x) != 0 # in support if non zero +end + +parametric_model = ParametricModel(modelHematopo, [:p1_1, + :sig, + :p2_2,:mu2, + :p3_3,:mu3, + :mu4,:p_link_timing,:p_link_diff,:rho_timing,:rho_diff], PriorDistribution()) + +@everywhere vec_observations = [ + 0.9619724128320803 , + 4.0896317302459595 , + 0.8282983755046477 , + 0.8287005006329901 , + 0.05732484076433121, + 0.11042944785276074, + 0.9333702560294956 , + 0.9598335366591979 , + 0.5030674846625767 , + 0.07975460122699386, + 0.5605095541401274 , +]; + + +### Load LHS and best params values +theta_LHS = Matrix(CSV.read("/workdir/hermangeg/curie/CMAES_thesis_ale/theta_big.csv", DataFrame, header=false)); + +res_sum_stats = Matrix(CSV.read("/workdir/hermangeg/curie/CMAES_thesis_ale/summary_stats.csv", DataFrame, header=false)) + +df = DataFrame(d=res_sum_stats[:,end], o=collect(1:size(theta_LHS,1))) +sort!(df) + +##CMAES + +function model(X) + +end + +function noiseModel(X) + set_param!(modelHematopo, X["parameters"][1:11]); + epsilon = hybrid_dist_obs([simulate(modelHematopo)], [vec_observations]) + return X["likelihood"] = exp(-epsilon^2) +end + +e = newEnvironment() + +lim_inf = [0.0,0.1,0.0,2.2,0.0,2.2,2.2,0.0,0.0,0.0,0.0] +lim_sup = [1,0.4,1,3.2,1,3.2,3.2,1,1,1,1] + +for i in 1:11 + push!(e["model"]["parameters"], Uniform(lim_inf[i],lim_sup[i])) #1 beta +end + +#We indicate what is our computational model (name of the function) +e["model"]["computational model"] = model + +e["likelihood"]["type"] = "perso" +e["likelihood"]["computational likelihood"] = noiseModel; + +C_prior = 1.0/prod(lim_sup .- lim_inf) + + +e["task"] = "parameter estimation" +e["algo"]["type"] = "CMAES"; + +covMat = Matrix(1I, 11, 11); +e["algo"]["variance"] = covMat; +e["algo"]["sigma"] = 0.05 +e["algo"]["min gen tol"] = 100 +e["algo"]["max iter"] = 100 +e["algo"]["lambda"] = 300 +e["algo"]["ratio limite lambda"] = 1 +e["algo"]["start"] = "distribution" +e["algo"]["sav_all_gen"] = true + +for i in 1:1 + +theta_ini = theta_LHS[df[i,:o],:] +arr_res = zeros(11*2+1+1+2) +arr_res[1:11] = theta_ini +arr_res[12] = i +arr_res[13] = df[i,:d] + + +e["algo"]["distribution"] = theta_ini +s = rand(collect(1:1000)) +arr_res[14] = s +Random.seed!(s) +@time run(e); + +arr_res[15:15+10] = e["results"]["m"] + +# Save Full res + +save(string("/workdir/hermangeg/curie/CMAES_thesis_gurvan/res/extRes_CMAES_",i,"_seed",s,".jld2"), e["results"]["sample"]) + + + +dist = zeros(300) +set_param!(modelHematopo, e["results"]["m"]); +for ii in 1:300 +epsilon = hybrid_dist_obs([simulate(modelHematopo)], [vec_observations]) +dist[ii] = epsilon +end +arr_res[end]= median(dist) +CSV.write(string("/workdir/hermangeg/curie/CMAES_thesis_gurvan/res/res_CMAES_",i,"_seed",s,".csv"), DataFrame(v=arr_res), header=false) + + +end