Automaton-ABC is implemented and works wells for ER 1D experiments

R1,R2,R3.

Automaton-ABC is implemented and works wells for ER 1D experiments
67040dc5 · Bentriou Mahmoud · a911a325 · 67040dc5 · 67040dc5 · 67040dc5
Commit 67040dc5 authored 4 years ago by Bentriou Mahmoud
--- a/algorithms/_utils_abc.jl
+++ b/algorithms/_utils_abc.jl
+
+import StatsBase: cov, ProbabilityWeights
+import Statistics: quantile
+import NearestNeighbors: knn, KDTree
+import Distributions: MvNormal, Categorical
+import Random: rand!
+
+function _init_abc_automaton!(old_mat_p::Matrix{Float64}, vec_dist::Vector{Float64}, 
+                              pm::ParametricModel, str_var_aut::String)
+    vec_p = zeros(pm.df)
+    for i = eachindex(vec_dist)
+        draw!(vec_p, pm)
+        old_mat_p[:,i] = vec_p
+        σ = simulate(pm, vec_p)
+        vec_dist[i] = σ.S[str_var_aut]
+    end
+end
+
+function _compute_epsilon(vec_dist::Vector{Float64}, α::Float64, 
+                          old_eps::Float64, last_eps::Float64)
+    u_vec_dist = unique(vec_dist)
+    if length(u_vec_dist) == 2
+        epsilon = quantile(u_vec_dist, α)
+    else
+        epsilon = quantile(vec_dist, α)
+    end
+    if old_eps == epsilon
+        print("eps == old_eps,  we readjust")
+        s_dist = sort(u_vec_dist)
+        max_dist = 0.0
+        for d in s_dist
+            if (d < epsilon) && (max_dist < d)
+                max_dist = d
+            end
+        end
+        max_dist = (max_dist == 0) ? 0.9*epsilon : max_dist
+        epsilon = max_dist
+    end
+    epsilon = (epsilon < last_eps) ? last_eps : epsilon
+end
+
+function _draw_param_kernel!(vec_p_prime::Vector{Float64}, 
+                             vec_p_star::SubArray{Float64,1}, old_mat_p::Matrix{Float64}, wl_old::Vector{Float64},
+                             mat_cov::Matrix{Float64}, tree_mat_p::Union{KDTree,Nothing}, kernel_type::String)
+    if kernel_type == "mvnormal"
+        d_mvnormal = MvNormal(vec_p_star, mat_cov)
+        rand!(d_mvnormal, vec_p_prime)
+    elseif kernel_type == "knn_mvnormal"
+        k = Int(round(0.25 * size(old_mat_p)[2]))
+        idxs, dist = knn(tree_mat_p, vec_p_star, k, true)
+        knn_mat_cov = 2 * cov(old_mat_p[:,idxs], ProbabilityWeights(wl_old[idxs]), 2; corrected=false)
+        d_knn_mvnormal = Distributions.MvNormal(vec_p_star, knn_mat_cov)
+        rand!(d_knn_mvnormal, vec_p_prime)
+        return knn_mat_cov
+    else
+        error("Unknown specified kernel")
+    end
+end
+
+function _update_param!(mat_p::Matrix{Float64}, vec_dist::Vector{Float64}, 
+                       l_nbr_sim::Vector{Int}, wl_current::Vector{Float64}, idx::Int,
+                       pm::ParametricModel, epsilon::Float64,
+                       wl_old::Vector{Float64}, old_mat_p::Matrix{Float64},
+                       mat_cov::Matrix{Float64}, tree_mat_p::Union{Nothing,KDTree}, 
+                       kernel_type::String, str_var_aut::String)
+    d_weights = Categorical(wl_old)
+    dist_sim = Inf
+    nbr_sim = 0
+    vec_p_prime = zeros(pm.df)
+    while dist_sim > epsilon
+        ind_p_star = rand(d_weights)
+        vec_p_star = view(old_mat_p, :, ind_p_star)
+        knn_mat_cov = _draw_param_kernel!(vec_p_prime, vec_p_star, old_mat_p, wl_old, mat_cov, tree_mat_p, kernel_type)
+        if !insupport(pm, vec_p_prime)
+            continue
+        end
+        σ = simulate(pm, vec_p_prime)
+        dist_sim = σ.S[str_var_aut]
+        nbr_sim += 1
+    end
+    
+    if kernel_type == "mvnormal"
+        mat_cov_kernel = mat_cov 
+    elseif kernel_type == "knn_mvnormal"
+        mat_cov_kernel = knn_mat_cov
+    else
+        error("Unknown kernel")
+    end
+    # Update
+    mat_p[:,idx] = vec_p_prime
+    vec_dist[idx] = dist_sim
+    l_nbr_sim[idx] = nbr_sim
+    _update_weight!(wl_current, idx, pm, wl_old, old_mat_p, vec_p_prime, mat_cov_kernel, kernel_type)
+end
+
+function _update_weight!(wl_current::Vector{Float64}, idx::Int,
+                         pm::ParametricModel,
+                         wl_old::Vector{Float64}, old_mat_p::Matrix{Float64}, 
+                         vec_p_prime::Vector{Float64},
+                         mat_cov_kernel::Matrix{Float64}, 
+                         kernel_type::String) 
+    denom = 0.0
+    for j in 1:length(wl_old)
+        #denom += wl_old[j] * Distributions.pdf(d_normal, inv_sqrt_mat_cov * (vec_p_current - old_mat_p[:,j]))::Float64 
+        denom += wl_old[j] * pdf(MvNormal(old_mat_p[:,j], mat_cov_kernel), vec_p_prime)::Float64 
+    end
+    wl_current[idx] = prior_pdf(pm, vec_p_prime) / denom 
+end
+
+function effective_sample_size(wl::Vector{Float64})
+    n_eff = 0.0
+    wls = sum(wl)
+    if wls > 0.0
+        n_eff = wls ^ 2 / sum(wl .^ 2)
+    end
+    return n_eff
+end
+
--- a/algorithms/automaton_abc.jl
+++ b/algorithms/automaton_abc.jl
+
+import Distributed: nprocs, nworkers, @distributed
+import StatsBase: mean, median, std, cov, ProbabilityWeights
+import NearestNeighbors: KDTree, knn 
+import Distributions: Categorical
+
+using DistributedArrays
+using LinearAlgebra
+using DelimitedFiles
+using Logging
+
+include(get_module_path() * "/algorithms/_utils_abc.jl")
+
+struct ResultAutomatonAbc
+	mat_p_end::Matrix{Float64}
+	mat_cov::Matrix{Float64}
+    nbr_sim::Int64
+    exec_time::Float64
+    vec_dist::Vector{Float64}
+    epsilon::Float64
+    weights::Vector{Float64}
+    l_ess::Vector{Float64}
+end
+
+function automaton_abc(pm::ParametricModel; nbr_particles::Int = 100, alpha::Float64 = 0.75, kernel_type = "mvnormal", 
+                       NT::Float64 = nbr_particles/2, duration_time::Float64 = Inf, 
+                       bound_sim::Int = typemax(Int), str_var_aut::String = "d", verbose::Int = 0) 
+    @assert typeof(pm.m) <: SynchronizedModel
+    @assert 0 < nbr_particles
+    @assert 0.0 < alpha < 1.0
+    @assert kernel_type in ["mvnormal", "knn_mvnormal"]
+    dir_res = create_results_dir()
+    file_cfg = open(dir_res * "cfg_abc_pmc.txt", "w")
+	write(file_cfg, "ParametricModel : $(pm) \n")
+	write(file_cfg, "Number of particles : $(nbr_particles) \n")
+    write(file_cfg, "alpha : $(alpha) \n")
+	write(file_cfg, "kernel type : $(kernel_type) \n")
+    close(file_cfg)
+	if nprocs() == 1
+		@info "Begin multi-threaded ABC PMC"
+        return _automaton_abc(pm, nbr_particles, alpha, kernel_type, NT, duration_time, bound_sim, dir_res, str_var_aut)
+	end
+	@info "Begin distributed ABC PMC"
+    _distributed_automaton_abc(pm, nbr_particles, alpha, kernel_type, NT, duration_time, bound_sim, dir_res, str_var_aut)
+end
+
+# To code: 
+# Pkg related: draw!, prior_density!, create_results_dir
+
+function _automaton_abc(pm::ParametricModel, nbr_particles::Int, alpha::Float64, 
+                        kernel_type::String, NT::Float64, duration_time::Float64, bound_sim::Int, 
+                        dir_res::String, str_var_aut::String)
+
+    @info "ABC PMC with $(nworkers()) processus and $(Threads.nthreads()) threads"
+    begin_time = time()
+    nbr_p = pm.df
+    last_epsilon = 0.0
+
+    # Init. Iteration 1
+    t = 1
+    epsilon = Inf
+    old_mat_p = zeros(nbr_p, nbr_particles)
+    vec_dist = zeros(nbr_particles)
+    @info "Step 1 : Init"
+    
+    _init_abc_automaton!(old_mat_p, vec_dist, pm, str_var_aut)
+    wl_old = ones(nbr_particles)
+    prior_pdf!(wl_old, pm, old_mat_p)
+    normalize!(wl_old, 1)
+    
+    ess = effective_sample_size(wl_old)
+    l_ess = zeros(0)
+    l_ess = push!(l_ess, ess)
+    
+    flush(stdout)
+    nbr_tot_sim = nbr_particles 
+    current_time = time()
+    old_epsilon = epsilon 
+	mat_p = zeros(nbr_p, nbr_particles)
+    while (epsilon > last_epsilon) && (current_time - begin_time <= duration_time) && (nbr_tot_sim <= bound_sim)
+        t += 1
+        begin_time_ite = time()
+        @info "Step $t"
+        # Set new epsilon
+        epsilon = _compute_epsilon(vec_dist, alpha, old_epsilon, last_epsilon)
+        @debug "5 first dist values" sort(vec_dist)[1:5]
+        @debug mean(vec_dist), maximum(vec_dist), median(vec_dist), std(vec_dist)
+        @info "Current epsilon" epsilon
+        @info "Total number of simulated trajectories" nbr_tot_sim
+        
+        # Broadcast simulations
+        l_nbr_sim = zeros(Int, nbr_particles) 
+        wl_current = zeros(nbr_particles)
+        mat_cov = nothing
+        tree_mat_p = nothing
+        if kernel_type == "mvnormal"
+            mat_cov = 2 * cov(old_mat_p, ProbabilityWeights(wl_old), 2; corrected=false)
+            @debug diag(mat_cov)
+            if det(mat_cov) == 0.0
+                @debug det(mat_cov), rank(mat_cov), effective_sample_size(wl_old)
+                @error "Bad inv mat cov"
+            end
+        end
+        if kernel_type == "knn_mvnormal"
+            tree_mat_p = KDTree(old_mat_p)
+        end
+        Threads.@threads for i = 1:nbr_particles
+            _update_param!(mat_p, vec_dist, l_nbr_sim, wl_current, i, pm, epsilon,
+                           wl_old, old_mat_p, mat_cov, tree_mat_p, kernel_type, str_var_aut)
+        end
+        normalize!(wl_current, 1)
+        nbr_tot_sim += sum(l_nbr_sim)
+        old_mat_p = mat_p
+        ess = effective_sample_size(wl_current)
+        @debug ess
+        l_ess = push!(l_ess, ess)
+        
+        # Resampling
+        if ess < NT
+            @info "Resampling.."
+            d_weights = Categorical(wl_current)
+            ind_w = rand(d_weights, nbr_particles)
+            old_mat_p = old_mat_p[:,ind_w]
+            wl_current = ones(nbr_particles)
+            normalize!(wl_current, 1)
+            @info "End"
+        end
+        @info "Time spent for this step" steptime=(current_time-begin_time_ite)
+        @info "Number of simulations for this step" sum(l_nbr_sim)
+        wl_old = copy(wl_current)
+        flush(stdout)
+        current_time = time()
+        old_epsilon = epsilon
+	end
+
+	mat_p = old_mat_p
+    mat_cov = cov(mat_p, ProbabilityWeights(wl_old), 2; corrected=false)
+    save_mat_p_end = false
+    if save_mat_p_end
+        writedlm(dir_res * "weights_end.csv", wl_old, ',')
+        writedlm(dir_res * "mat_p_end.csv", old_mat_p, ',')
+    end
+    r = ResultAutomatonAbc(mat_p, mat_cov, nbr_tot_sim, time() - begin_time, vec_dist, epsilon, wl_old, l_ess)
+    return r
+end
--- a/tests/automaton_abc/R1.jl
+++ b/tests/automaton_abc/R1.jl

 using MarkovProcesses
+using Plots

 load_model("ER")
 load_automaton("automaton_F")
-A_F = create_automaton_F(ER, 50.0, 75.0, 0.025, 0.05, "P")
-sync_ER = A_F*ER 
+A_F_R1 = create_automaton_F(ER, 50.0, 75.0, 0.025, 0.05, "P")
+sync_ER = A_F_R1*ER 
 pm_sync_ER = ParametricModel(sync_ER, ("k3", Uniform(0.0, 100.0)))

-automaton_abc(pm_sync_ER)
+r = automaton_abc(pm_sync_ER; nbr_particles = 1000)
+
+histogram(r.mat_p_end', weights = r.weights, normalize = :density)
+png("R1_hist.png")

--- a/tests/automaton_abc/R2.jl
+++ b/tests/automaton_abc/R2.jl
+
+using MarkovProcesses
+using Plots
+
+load_model("ER")
+load_automaton("automaton_F")
+A_F_R2 = create_automaton_F(ER, 50.0, 75.0, 0.05, 0.075, "P")
+sync_ER = A_F_R2*ER 
+pm_sync_ER = ParametricModel(sync_ER, ("k3", Uniform(0.0, 100.0)))
+
+r = automaton_abc(pm_sync_ER; nbr_particles = 1000)
+@show r.nbr_sim
+
+histogram(r.mat_p_end', weights = r.weights, normalize = :density)
+png("R2_hist.png")
+
--- a/tests/automaton_abc/R3.jl
+++ b/tests/automaton_abc/R3.jl
+
+using MarkovProcesses
+using Plots
+
+load_model("ER")
+load_automaton("automaton_F")
+A_F_R3 = create_automaton_F(ER, 25.0, 50.0, 0.05, 0.075, "P")
+sync_ER = A_F_R3*ER 
+pm_sync_ER = ParametricModel(sync_ER, ("k3", Uniform(0.0, 100.0)))
+
+r = automaton_abc(pm_sync_ER; nbr_particles = 1000)
+
+histogram(r.mat_p_end', weights = r.weights, normalize = :density, xlims = (0.0, 100.0))
+png("R3_hist.png")
+