diff --git a/algorithms/_utils_abc.jl b/algorithms/_utils_abc.jl new file mode 100644 index 0000000000000000000000000000000000000000..09422a9954776eb1b06ec770e4205d06a37a3bd7 --- /dev/null +++ b/algorithms/_utils_abc.jl @@ -0,0 +1,118 @@ + +import StatsBase: cov, ProbabilityWeights +import Statistics: quantile +import NearestNeighbors: knn, KDTree +import Distributions: MvNormal, Categorical +import Random: rand! + +function _init_abc_automaton!(old_mat_p::Matrix{Float64}, vec_dist::Vector{Float64}, + pm::ParametricModel, str_var_aut::String) + vec_p = zeros(pm.df) + for i = eachindex(vec_dist) + draw!(vec_p, pm) + old_mat_p[:,i] = vec_p + σ = simulate(pm, vec_p) + vec_dist[i] = σ.S[str_var_aut] + end +end + +function _compute_epsilon(vec_dist::Vector{Float64}, α::Float64, + old_eps::Float64, last_eps::Float64) + u_vec_dist = unique(vec_dist) + if length(u_vec_dist) == 2 + epsilon = quantile(u_vec_dist, α) + else + epsilon = quantile(vec_dist, α) + end + if old_eps == epsilon + print("eps == old_eps, we readjust") + s_dist = sort(u_vec_dist) + max_dist = 0.0 + for d in s_dist + if (d < epsilon) && (max_dist < d) + max_dist = d + end + end + max_dist = (max_dist == 0) ? 0.9*epsilon : max_dist + epsilon = max_dist + end + epsilon = (epsilon < last_eps) ? last_eps : epsilon +end + +function _draw_param_kernel!(vec_p_prime::Vector{Float64}, + vec_p_star::SubArray{Float64,1}, old_mat_p::Matrix{Float64}, wl_old::Vector{Float64}, + mat_cov::Matrix{Float64}, tree_mat_p::Union{KDTree,Nothing}, kernel_type::String) + if kernel_type == "mvnormal" + d_mvnormal = MvNormal(vec_p_star, mat_cov) + rand!(d_mvnormal, vec_p_prime) + elseif kernel_type == "knn_mvnormal" + k = Int(round(0.25 * size(old_mat_p)[2])) + idxs, dist = knn(tree_mat_p, vec_p_star, k, true) + knn_mat_cov = 2 * cov(old_mat_p[:,idxs], ProbabilityWeights(wl_old[idxs]), 2; corrected=false) + d_knn_mvnormal = Distributions.MvNormal(vec_p_star, knn_mat_cov) + rand!(d_knn_mvnormal, vec_p_prime) + return knn_mat_cov + else + error("Unknown specified kernel") + end +end + +function _update_param!(mat_p::Matrix{Float64}, vec_dist::Vector{Float64}, + l_nbr_sim::Vector{Int}, wl_current::Vector{Float64}, idx::Int, + pm::ParametricModel, epsilon::Float64, + wl_old::Vector{Float64}, old_mat_p::Matrix{Float64}, + mat_cov::Matrix{Float64}, tree_mat_p::Union{Nothing,KDTree}, + kernel_type::String, str_var_aut::String) + d_weights = Categorical(wl_old) + dist_sim = Inf + nbr_sim = 0 + vec_p_prime = zeros(pm.df) + while dist_sim > epsilon + ind_p_star = rand(d_weights) + vec_p_star = view(old_mat_p, :, ind_p_star) + knn_mat_cov = _draw_param_kernel!(vec_p_prime, vec_p_star, old_mat_p, wl_old, mat_cov, tree_mat_p, kernel_type) + if !insupport(pm, vec_p_prime) + continue + end + σ = simulate(pm, vec_p_prime) + dist_sim = σ.S[str_var_aut] + nbr_sim += 1 + end + + if kernel_type == "mvnormal" + mat_cov_kernel = mat_cov + elseif kernel_type == "knn_mvnormal" + mat_cov_kernel = knn_mat_cov + else + error("Unknown kernel") + end + # Update + mat_p[:,idx] = vec_p_prime + vec_dist[idx] = dist_sim + l_nbr_sim[idx] = nbr_sim + _update_weight!(wl_current, idx, pm, wl_old, old_mat_p, vec_p_prime, mat_cov_kernel, kernel_type) +end + +function _update_weight!(wl_current::Vector{Float64}, idx::Int, + pm::ParametricModel, + wl_old::Vector{Float64}, old_mat_p::Matrix{Float64}, + vec_p_prime::Vector{Float64}, + mat_cov_kernel::Matrix{Float64}, + kernel_type::String) + denom = 0.0 + for j in 1:length(wl_old) + #denom += wl_old[j] * Distributions.pdf(d_normal, inv_sqrt_mat_cov * (vec_p_current - old_mat_p[:,j]))::Float64 + denom += wl_old[j] * pdf(MvNormal(old_mat_p[:,j], mat_cov_kernel), vec_p_prime)::Float64 + end + wl_current[idx] = prior_pdf(pm, vec_p_prime) / denom +end + +function effective_sample_size(wl::Vector{Float64}) + n_eff = 0.0 + wls = sum(wl) + if wls > 0.0 + n_eff = wls ^ 2 / sum(wl .^ 2) + end + return n_eff +end + diff --git a/algorithms/automaton_abc.jl b/algorithms/automaton_abc.jl new file mode 100644 index 0000000000000000000000000000000000000000..d88f09bd60ef24ecd78d0d7828bcecebec221000 --- /dev/null +++ b/algorithms/automaton_abc.jl @@ -0,0 +1,145 @@ + +import Distributed: nprocs, nworkers, @distributed +import StatsBase: mean, median, std, cov, ProbabilityWeights +import NearestNeighbors: KDTree, knn +import Distributions: Categorical + +using DistributedArrays +using LinearAlgebra +using DelimitedFiles +using Logging + +include(get_module_path() * "/algorithms/_utils_abc.jl") + +struct ResultAutomatonAbc + mat_p_end::Matrix{Float64} + mat_cov::Matrix{Float64} + nbr_sim::Int64 + exec_time::Float64 + vec_dist::Vector{Float64} + epsilon::Float64 + weights::Vector{Float64} + l_ess::Vector{Float64} +end + +function automaton_abc(pm::ParametricModel; nbr_particles::Int = 100, alpha::Float64 = 0.75, kernel_type = "mvnormal", + NT::Float64 = nbr_particles/2, duration_time::Float64 = Inf, + bound_sim::Int = typemax(Int), str_var_aut::String = "d", verbose::Int = 0) + @assert typeof(pm.m) <: SynchronizedModel + @assert 0 < nbr_particles + @assert 0.0 < alpha < 1.0 + @assert kernel_type in ["mvnormal", "knn_mvnormal"] + dir_res = create_results_dir() + file_cfg = open(dir_res * "cfg_abc_pmc.txt", "w") + write(file_cfg, "ParametricModel : $(pm) \n") + write(file_cfg, "Number of particles : $(nbr_particles) \n") + write(file_cfg, "alpha : $(alpha) \n") + write(file_cfg, "kernel type : $(kernel_type) \n") + close(file_cfg) + if nprocs() == 1 + @info "Begin multi-threaded ABC PMC" + return _automaton_abc(pm, nbr_particles, alpha, kernel_type, NT, duration_time, bound_sim, dir_res, str_var_aut) + end + @info "Begin distributed ABC PMC" + _distributed_automaton_abc(pm, nbr_particles, alpha, kernel_type, NT, duration_time, bound_sim, dir_res, str_var_aut) +end + +# To code: +# Pkg related: draw!, prior_density!, create_results_dir + +function _automaton_abc(pm::ParametricModel, nbr_particles::Int, alpha::Float64, + kernel_type::String, NT::Float64, duration_time::Float64, bound_sim::Int, + dir_res::String, str_var_aut::String) + + @info "ABC PMC with $(nworkers()) processus and $(Threads.nthreads()) threads" + begin_time = time() + nbr_p = pm.df + last_epsilon = 0.0 + + # Init. Iteration 1 + t = 1 + epsilon = Inf + old_mat_p = zeros(nbr_p, nbr_particles) + vec_dist = zeros(nbr_particles) + @info "Step 1 : Init" + + _init_abc_automaton!(old_mat_p, vec_dist, pm, str_var_aut) + wl_old = ones(nbr_particles) + prior_pdf!(wl_old, pm, old_mat_p) + normalize!(wl_old, 1) + + ess = effective_sample_size(wl_old) + l_ess = zeros(0) + l_ess = push!(l_ess, ess) + + flush(stdout) + nbr_tot_sim = nbr_particles + current_time = time() + old_epsilon = epsilon + mat_p = zeros(nbr_p, nbr_particles) + while (epsilon > last_epsilon) && (current_time - begin_time <= duration_time) && (nbr_tot_sim <= bound_sim) + t += 1 + begin_time_ite = time() + @info "Step $t" + # Set new epsilon + epsilon = _compute_epsilon(vec_dist, alpha, old_epsilon, last_epsilon) + @debug "5 first dist values" sort(vec_dist)[1:5] + @debug mean(vec_dist), maximum(vec_dist), median(vec_dist), std(vec_dist) + @info "Current epsilon" epsilon + @info "Total number of simulated trajectories" nbr_tot_sim + + # Broadcast simulations + l_nbr_sim = zeros(Int, nbr_particles) + wl_current = zeros(nbr_particles) + mat_cov = nothing + tree_mat_p = nothing + if kernel_type == "mvnormal" + mat_cov = 2 * cov(old_mat_p, ProbabilityWeights(wl_old), 2; corrected=false) + @debug diag(mat_cov) + if det(mat_cov) == 0.0 + @debug det(mat_cov), rank(mat_cov), effective_sample_size(wl_old) + @error "Bad inv mat cov" + end + end + if kernel_type == "knn_mvnormal" + tree_mat_p = KDTree(old_mat_p) + end + Threads.@threads for i = 1:nbr_particles + _update_param!(mat_p, vec_dist, l_nbr_sim, wl_current, i, pm, epsilon, + wl_old, old_mat_p, mat_cov, tree_mat_p, kernel_type, str_var_aut) + end + normalize!(wl_current, 1) + nbr_tot_sim += sum(l_nbr_sim) + old_mat_p = mat_p + ess = effective_sample_size(wl_current) + @debug ess + l_ess = push!(l_ess, ess) + + # Resampling + if ess < NT + @info "Resampling.." + d_weights = Categorical(wl_current) + ind_w = rand(d_weights, nbr_particles) + old_mat_p = old_mat_p[:,ind_w] + wl_current = ones(nbr_particles) + normalize!(wl_current, 1) + @info "End" + end + @info "Time spent for this step" steptime=(current_time-begin_time_ite) + @info "Number of simulations for this step" sum(l_nbr_sim) + wl_old = copy(wl_current) + flush(stdout) + current_time = time() + old_epsilon = epsilon + end + + mat_p = old_mat_p + mat_cov = cov(mat_p, ProbabilityWeights(wl_old), 2; corrected=false) + save_mat_p_end = false + if save_mat_p_end + writedlm(dir_res * "weights_end.csv", wl_old, ',') + writedlm(dir_res * "mat_p_end.csv", old_mat_p, ',') + end + r = ResultAutomatonAbc(mat_p, mat_cov, nbr_tot_sim, time() - begin_time, vec_dist, epsilon, wl_old, l_ess) + return r +end diff --git a/tests/automaton_abc/R1.jl b/tests/automaton_abc/R1.jl index bd9536cc3c4d9c38aa744443e051a89d57a42da5..6bf070715b10a002b1fca9e82b5bc494eabbd349 100644 --- a/tests/automaton_abc/R1.jl +++ b/tests/automaton_abc/R1.jl @@ -1,11 +1,15 @@ using MarkovProcesses +using Plots load_model("ER") load_automaton("automaton_F") -A_F = create_automaton_F(ER, 50.0, 75.0, 0.025, 0.05, "P") -sync_ER = A_F*ER +A_F_R1 = create_automaton_F(ER, 50.0, 75.0, 0.025, 0.05, "P") +sync_ER = A_F_R1*ER pm_sync_ER = ParametricModel(sync_ER, ("k3", Uniform(0.0, 100.0))) -automaton_abc(pm_sync_ER) +r = automaton_abc(pm_sync_ER; nbr_particles = 1000) + +histogram(r.mat_p_end', weights = r.weights, normalize = :density) +png("R1_hist.png") diff --git a/tests/automaton_abc/R2.jl b/tests/automaton_abc/R2.jl new file mode 100644 index 0000000000000000000000000000000000000000..b84d02bbaba9a5c5579be61b64e4ed27a557b7cd --- /dev/null +++ b/tests/automaton_abc/R2.jl @@ -0,0 +1,16 @@ + +using MarkovProcesses +using Plots + +load_model("ER") +load_automaton("automaton_F") +A_F_R2 = create_automaton_F(ER, 50.0, 75.0, 0.05, 0.075, "P") +sync_ER = A_F_R2*ER +pm_sync_ER = ParametricModel(sync_ER, ("k3", Uniform(0.0, 100.0))) + +r = automaton_abc(pm_sync_ER; nbr_particles = 1000) +@show r.nbr_sim + +histogram(r.mat_p_end', weights = r.weights, normalize = :density) +png("R2_hist.png") + diff --git a/tests/automaton_abc/R3.jl b/tests/automaton_abc/R3.jl new file mode 100644 index 0000000000000000000000000000000000000000..530ad6a92887faf34b3790add2ae5da4b002f78c --- /dev/null +++ b/tests/automaton_abc/R3.jl @@ -0,0 +1,15 @@ + +using MarkovProcesses +using Plots + +load_model("ER") +load_automaton("automaton_F") +A_F_R3 = create_automaton_F(ER, 25.0, 50.0, 0.05, 0.075, "P") +sync_ER = A_F_R3*ER +pm_sync_ER = ParametricModel(sync_ER, ("k3", Uniform(0.0, 100.0))) + +r = automaton_abc(pm_sync_ER; nbr_particles = 1000) + +histogram(r.mat_p_end', weights = r.weights, normalize = :density, xlims = (0.0, 100.0)) +png("R3_hist.png") +