diff --git a/core/MarkovProcesses.jl b/core/MarkovProcesses.jl index d05b0c8da1afafd2a13e5f4310900a10200a6b39..029af35579835f397f3a304c30bf4371c47ee1f2 100644 --- a/core/MarkovProcesses.jl +++ b/core/MarkovProcesses.jl @@ -9,8 +9,7 @@ export load_model, get_module_path include("model.jl") export Observations, AbstractTrajectory, Trajectory -export +,-,δ -export dist_lp, l_dist_lp +export +, -, δ, dist_lp export get_obs_var, length_states, length_obs_var, is_bounded include("trajectory.jl") diff --git a/core/model.jl b/core/model.jl index 79e42d377f13a4c764cc02817fc88edf737cab50..f351a5f41111face99f33f9f91c70683708be3b0 100644 --- a/core/model.jl +++ b/core/model.jl @@ -47,7 +47,7 @@ end function simulate(m::ContinuousTimeModel) # trajectory fields - full_values = zeros(1, m.d) + full_values = Matrix{Int}(undef, 1, m.d) full_values[1,:] = m.x0 times = Float64[m.t0] transitions = Union{String,Nothing}[nothing] @@ -75,18 +75,18 @@ function simulate(m::ContinuousTimeModel) n += i is_absorbing = m.is_absorbing(m.p,xn)::Bool end - values = @view full_values[:,m._g_idx] if is_bounded(m) if times[end] > m.time_bound - values[end,:] = values[end-1,:] + full_values[end,:] = full_values[end-1,:] times[end] = m.time_bound transitions[end] = nothing else - vcat(values, values[end,:]) + full_values = vcat(full_values, reshape(full_values[end,:], 1, m.d)) push!(times, m.time_bound) push!(transitions, nothing) end end + values = @view full_values[:,m._g_idx] return Trajectory(m, values, times, transitions) end diff --git a/core/trajectory.jl b/core/trajectory.jl index 865209980252fa867f31f042aa07b01c83ec4d2c..5fdb3aa252ea5cacb74bf0e247b093b01138be7f 100644 --- a/core/trajectory.jl +++ b/core/trajectory.jl @@ -15,7 +15,7 @@ function -(σ1::AbstractTrajectory,σ2::AbstractTrajectory) end # Top-level Lp distance function """ - `list_dist_lp(l_σ1, l_σ2; verbose, p, str_stat_list, str_stat_trajectory)` + `dist_lp(l_σ1, l_σ2; verbose, p, str_stat_list, str_stat_trajectory)` Function that computes Lp distance between two set of any dimensional trajectories. ... @@ -26,13 +26,14 @@ Function that computes Lp distance between two set of any dimensional trajectori It is salso called linkage function in clustering field. Only "mean" is available for now on. ... """ -function list_dist_lp(l_σ1::Vector{AbstractTrajectory}, l_σ2::Vector{AbstractTrajectory}; +function dist_lp(l_σ1::Vector{<:AbstractTrajectory}, l_σ2::Vector{<:AbstractTrajectory}; verbose::Bool = false, p::Int = 1, str_stat_list::String = "mean", str_stat_trajectory::String = "mean") if str_stat_list == "mean" return dist_lp_mean(l_σ1, l_σ2; verbose = verbose, p = p, str_stat_trajectory = str_stat_trajectory) end error("Unrecognized statistic in dist_lp") end + """ `dist_lp(σ1, σ2; verbose, p, str_stat)` @@ -54,6 +55,7 @@ function dist_lp(σ1::AbstractTrajectory, σ2::AbstractTrajectory; end error("Unrecognized statistic in dist_lp") end + """ `dist_lp(σ1, σ2, var; verbose, p, str_stat)` @@ -72,11 +74,11 @@ function dist_lp(σ1::AbstractTrajectory, σ2::AbstractTrajectory, var::String; if !is_bounded(σ1) || !is_bounded(σ2) @warn "Lp distance computed on unbounded trajectories. Result should be wrong" end - return dist_lp(σ1[var], σ1["times"], σ2[var], σ2["times"]) + return dist_lp(σ1[var], σ1["times"], σ2[var], σ2["times"]; verbose = false, p = p) end # Distance function. Vectorized version -function dist_lp(x_obs::AbstractVector{Int}, t_x::Vector{Float64}, y_obs::AbstractVector{Int}, t_y::Vector{Float64}; +function dist_lp(x_obs::AbstractVector{Int}, t_x::Vector{Float64}, y_obs::AbstractVector{Int}, t_y::Vector{Float64}; verbose::Bool = false, p::Int = 1) current_y_obs = y_obs[1] current_t_y = t_y[2] @@ -91,11 +93,11 @@ function dist_lp(x_obs::AbstractVector{Int}, t_x::Vector{Float64}, y_obs::Abstr end last_t_y = t_x[i] while current_t_y < t_x[i+1] - rect = abs(current_y_obs - x_obs[i]) * (current_t_y - last_t_y) + rect = abs(current_y_obs - x_obs[i])^p * (current_t_y - last_t_y) res += rect if verbose println("-- in loop :") - println("-- add : $rect abs($current_y_obs - $(x_obs[i])) * ($current_t_y - $(last_t_y)) / $(abs(current_y_obs - x_obs[i])) * $(current_t_y - last_t_y)") + println("-- add : $rect abs($current_y_obs - $(x_obs[i]))^p * ($current_t_y - $(last_t_y)) / $(abs(current_y_obs - x_obs[i])^p) * $(current_t_y - last_t_y)") print("-- ") @show current_y_obs, current_t_y end @@ -105,17 +107,17 @@ function dist_lp(x_obs::AbstractVector{Int}, t_x::Vector{Float64}, y_obs::Abstr current_t_y = t_y[idx+1] end last_t_read = max(last_t_y, t_x[i]) - rect = abs(current_y_obs - x_obs[i]) * (t_x[i+1] - last_t_read) + rect = abs(current_y_obs - x_obs[i])^p * (t_x[i+1] - last_t_read) res += rect if verbose - println("add : $rect abs($current_y_obs - $(x_obs[i])) * ($(t_x[i+1]) - $last_t_read) / $(abs(current_y_obs - x_obs[i])) * $(t_x[i+1] - last_t_read)") + println("add : $rect abs($current_y_obs - $(x_obs[i]))^p * ($(t_x[i+1]) - $last_t_read) / $(abs(current_y_obs - x_obs[i])^p) * $(t_x[i+1] - last_t_read)") @show t_x[i+1], t_y[idx+1] @show y_obs[idx], x_obs[i] @show last_t_read @show current_y_obs end end - return res + return res^(1/p) end # For all the observed variables function dist_lp_mean(σ1::AbstractTrajectory, σ2::AbstractTrajectory; @@ -123,12 +125,12 @@ function dist_lp_mean(σ1::AbstractTrajectory, σ2::AbstractTrajectory; if get_obs_var(σ1) != get_obs_var(σ2) error("Lp distances should be computed with the same observed variables") end res = 0.0 for var in get_obs_var(σ1) - res += dist_lp(σ1, σ2, var) + res += dist_lp(σ1, σ2, var; verbose = verbose, p = p) end return res / length_obs_var(σ1) end # For a list of trajectories -function list_dist_lp_mean(l_σ1::Vector{AbstractTrajectory}, l_σ2::Vector{AbstractTrajectory}; +function dist_lp_mean(l_σ1::Vector{<:AbstractTrajectory}, l_σ2::Vector{<:AbstractTrajectory}; verbose::Bool = false, p::Int = 1, str_stat_trajectory::String = "mean") res = 0.0 for σ1 in l_σ1 @@ -138,6 +140,36 @@ function list_dist_lp_mean(l_σ1::Vector{AbstractTrajectory}, l_σ2::Vector{Abst end return res / (length(l_σ1)*length(l_σ2)) end +# Get the value at any time +function _f_step(l_x::AbstractArray, l_t::AbstractArray, t::Float64) + @assert length(l_x) == length(l_t) + for i = 1:(length(l_t)-1) + if l_t[i] <= t < l_t[i+1] + return l_x[i] + end + end + if l_t[end] == t + return l_x[end] + end + return missing +end +# Riemann sum +function _riemann_sum(f::Function, t_begin::Real, t_end::Real, step::Float64) + res = 0.0 + l_t = collect(t_begin:step:t_end) + for i in 2:length(l_t) + res += f(l_t[i]) * (l_t[i] - l_t[i-1]) + end + return res +end + +function check_consistency(σ::AbstractTrajectory) + test_sizes = length(σ.times) == length(σ.transitions) == size(σ.values)[1] + test_obs_var = length_obs_var(σ) == length(σ.m.g) == size(σ.values)[2] + if !test_sizes error("Issue between sizes of values/times/transitions in this trajectory.") end + if !test_obs_var error("Issue between sizes of observed variables in model and values") end + return true +end # Properties of the trajectory length_states(σ::AbstractTrajectory) = length(σ.times) diff --git a/tests/dist_lp/dist_case_2.jl b/tests/dist_lp/dist_case_2.jl new file mode 100644 index 0000000000000000000000000000000000000000..786c0da9977970a30aeb4d87d17e9b6ae6a4caad --- /dev/null +++ b/tests/dist_lp/dist_case_2.jl @@ -0,0 +1,42 @@ + +using MarkovProcesses +import QuadGK: quadgk +load_model("SIR") + +test_all = true +for p = 1:2 + res = (4-1)^p * 0.5 + (4-2)^p * 0.1 + 0 + (5-3)^p * 0.1 + (5-1)^p * (1.4 - 0.8) + (3-1)^p * (3.0-1.4) + res = res^(1/p) + + y_obs = [1, 2, 5, 3, 3] + t_y = [0.0, 0.5, 0.7, 1.4, 3.0] + values = zeros(length(y_obs), 1) + values[:,1] = y_obs + l_tr = Vector{Nothing}(nothing, length(y_obs)) + σ2 = Trajectory(SIR, values, t_y, l_tr) + + x_obs = [4, 2, 3, 1, 1] + t_x = [0.0, 0.6, 0.7, 0.8, 3.0] + values = zeros(length(x_obs), 1) + values[:,1] = x_obs + l_tr = Vector{Nothing}(nothing, length(x_obs)) + σ1 = Trajectory(SIR, values, t_x, l_tr) + + test_1 = isapprox(dist_lp(x_obs, t_x, y_obs, t_y; p=p), res; atol = 1E-10) + test_1 = test_1 && isapprox(dist_lp(σ1,σ2; p=p), res; atol = 1E-10) + test_1_bis = isapprox(dist_lp(y_obs, t_y, x_obs, t_x; p=p), res; atol = 1E-10) + test_1_bis = test_1_bis && isapprox(dist_lp(σ2,σ1; p=p), res; atol = 1E-10) + + f_x(t::Real) = MarkovProcesses._f_step(x_obs, t_x, t) + f_y(t::Real) = MarkovProcesses._f_step(y_obs, t_y, t) + diff_f(t) = abs(f_x(t) - f_y(t))^p + int, err = quadgk(diff_f, 0.0, 3.0) + res_int = int^(1/p) + + test_2 = isapprox(res, res_int; atol = err) + + global test_all = test_all && test_1 && test_1_bis && test_2 +end + +return test_all + diff --git a/tests/dist_lp/dist_case_3.jl b/tests/dist_lp/dist_case_3.jl new file mode 100644 index 0000000000000000000000000000000000000000..149a517d42728ac851478039c2c63061d3be9bd0 --- /dev/null +++ b/tests/dist_lp/dist_case_3.jl @@ -0,0 +1,44 @@ + +using MarkovProcesses +import QuadGK: quadgk +load_model("SIR") + +test_all = true +for p = 1:2 + x_obs =[5, 6, 5, 4, 3, 2, 1, 1] + t_x = [0.0, 3.10807, 4.29827, 4.40704, 5.67024, 7.1299, 11.2763, 20.0] + values = zeros(length(x_obs), 1) + values[:,1] = x_obs + l_tr = Vector{Nothing}(nothing, length(x_obs)) + σ1 = Trajectory(SIR, values, t_x, l_tr) + + y_obs =[5, 4, 5, 4, 3, 4, 3, 2, 3, 2, 1, 2, 3, 4, 3, 4, 4] + t_y = [0.0, 0.334082, 1.21012, 1.40991, 1.58866, 2.45879, 2.94545, 4.66746, 5.44723, 5.88066, 7.25626, 11.4036, 13.8373, 17.1363, 17.8193, 18.7613, 20.0] + values = zeros(length(y_obs), 1) + values[:,1] = y_obs + l_tr = Vector{Nothing}(nothing, length(y_obs)) + σ2 = Trajectory(SIR, values, t_y, l_tr) + + f_x(t::Real) = MarkovProcesses._f_step(x_obs, t_x, t) + f_y(t::Real) = MarkovProcesses._f_step(y_obs, t_y, t) + diff_f(t) = abs(f_x(t) - f_y(t))^p + int, err = quadgk(diff_f, 0.0, 20.0, rtol=1e-10) + int_riemann = MarkovProcesses._riemann_sum(diff_f, 0.0, 20.0, 1E-5) + int_riemann = int_riemann^(1/p) + + res1 = dist_lp(x_obs, t_x, y_obs, t_y; p=p) + res2 = dist_lp(σ1,σ2; p=p) + res1_bis = dist_lp(y_obs, t_y, x_obs, t_x; p=p) + res2_bis = dist_lp(σ2,σ1; p=p) + test_1 = isapprox(res1, int_riemann; atol = 1E-3) + test_1 = test_1 && isapprox(res2, int_riemann; atol = 1E-3) + test_1_bis = isapprox(res1_bis, int_riemann; atol = 1E-3) + test_1_bis = test_1_bis && isapprox(res2_bis, int_riemann; atol = 1E-3) + + test_2 = res1 == res2 == res1_bis == res2_bis + + global test_all = test_all && test_1 && test_1_bis && test_2 +end + +return test_all + diff --git a/tests/dist_lp/dist_l1_case_1.jl b/tests/dist_lp/dist_l1_case_1.jl new file mode 100644 index 0000000000000000000000000000000000000000..08e9d7ebe7227f8f64d4b8b6baed62e0f3a0f6c3 --- /dev/null +++ b/tests/dist_lp/dist_l1_case_1.jl @@ -0,0 +1,40 @@ + +using MarkovProcesses +import QuadGK: quadgk +load_model("SIR") + +# Case 1 : 6.4 + +x_obs = [2, 4, 3, 3] +t_x = [0.0, 0.5, 1.2, 2.2] +t_y_bis = [0.0, 0.5, 1.2, 2.2] +values = zeros(length(x_obs), 1) +values[:,1] = x_obs +l_tr = Vector{Nothing}(nothing, length(x_obs)) +σ1 = Trajectory(SIR, values, t_x, l_tr) + +y_obs = [6, 6] +t_y = [0.0, 2.2] +t_x_bis = [0.0, 2.2] +values = zeros(length(y_obs), 1) +values[:,1] = y_obs +l_tr = Vector{Nothing}(nothing, length(y_obs)) +σ2 = Trajectory(SIR, values, t_y, l_tr) + +test_1 = dist_lp(x_obs, t_x, y_obs, t_y; p=1) == 6.4 +test_1 = test_1 && dist_lp(σ1, σ2; p=1) == 6.4 + +f_x(t::Real) = MarkovProcesses._f_step(x_obs, t_x, t) +f_y(t::Real) = MarkovProcesses._f_step(y_obs, t_y, t) +diff_f(t) = abs(f_x(t) - f_y(t)) +int, err = quadgk(diff_f, 0.0, 2.2) + +test_2 = isapprox(6.4, int; atol=err) + +# Case 1 bis : inverse of case 1 + +test_1_bis = dist_lp(y_obs, t_y, x_obs, t_x; p=1) == 6.4 +test_1_bis = test_1_bis && dist_lp(σ2, σ1; p=1) == 6.4 + +return test_1 && test_1_bis && test_2 + diff --git a/tests/dist_lp/dist_sim_sir.jl b/tests/dist_lp/dist_sim_sir.jl new file mode 100644 index 0000000000000000000000000000000000000000..5236040bbf550a07eb4a903e3c108534f82fd34e --- /dev/null +++ b/tests/dist_lp/dist_sim_sir.jl @@ -0,0 +1,33 @@ + +using MarkovProcesses +load_model("SIR") +time_bound = 100.0 +SIR.time_bound = time_bound + +test_all = true + +for p = 1:2 + nb_sim = 10 + for i = 1:nb_sim + σ1 = simulate(SIR) + σ2 = simulate(SIR) + d = dist_lp(σ1, σ2, "I"; p = p) + d2 = dist_lp(σ1, σ2; p = p) + + f_x(t::Float64) = MarkovProcesses._f_step(σ1["I"], σ1["times"], t) + f_y(t::Float64) = MarkovProcesses._f_step(σ2["I"], σ2["times"], t) + diff_f(t) = abs(f_x(t) - f_y(t))^p + int_riemann = MarkovProcesses._riemann_sum(diff_f, SIR.t0, SIR.time_bound, 1E-3) + res_int_riemann = int_riemann^(1/p) + + test = isapprox(d, res_int_riemann; atol = 1E-1) + test2 = isapprox(d2, res_int_riemann; atol = 1E-1) + #@show d, res_int_riemann + #@show test + + global test_all = test_all && test && test2 + end +end + +return test_all + diff --git a/tests/run_all.jl b/tests/run_all.jl index d8c11f700af560185a8f28890fe4a1e262b4b8d9..e1433d2a44cd2e7de93f15912c657758868ad269 100644 --- a/tests/run_all.jl +++ b/tests/run_all.jl @@ -1,4 +1,5 @@ include("run_unit.jl") include("run_simulation.jl") +include("run_dist_lp.jl") diff --git a/tests/run_dist_lp.jl b/tests/run_dist_lp.jl new file mode 100644 index 0000000000000000000000000000000000000000..6fc3507895eb02a3217f926b9f38be5bfd121d4c --- /dev/null +++ b/tests/run_dist_lp.jl @@ -0,0 +1,10 @@ + +using Test + +@testset "Distance Lp tests" begin + @test include("dist_lp/dist_l1_case_1.jl") + @test include("dist_lp/dist_case_2.jl") + @test include("dist_lp/dist_case_3.jl") + @test include("dist_lp/dist_sim_sir.jl") +end + diff --git a/tests/run_unit.jl b/tests/run_unit.jl index e21473893d96b429b080927ba192a6e38d121cbc..f334c5dd9fdfe5f780f52c3429ecdc5ca587a16d 100644 --- a/tests/run_unit.jl +++ b/tests/run_unit.jl @@ -15,5 +15,7 @@ using Test @test include("unit/length_obs_var.jl") @test include("unit/dist_lp_var.jl") @test include("unit/dist_lp.jl") + @test include("unit/l_dist_lp.jl") + @test include("unit/check_trajectory_consistency.jl") end diff --git a/tests/unit/check_trajectory_consistency.jl b/tests/unit/check_trajectory_consistency.jl new file mode 100644 index 0000000000000000000000000000000000000000..4ae5da9bd52213f41d913f47fe2b5f447ea92b80 --- /dev/null +++ b/tests/unit/check_trajectory_consistency.jl @@ -0,0 +1,28 @@ + +using MarkovProcesses +load_model("SIR") +SIR.time_bound = 100.0 +load_model("ER") +ER.time_bound = 10.0 + +test_all = true +nb_sim = 1000 + +for i = 1:nb_sim + σ = simulate(SIR) + σ2 = simulate(ER) + try + global test_all = test_all && MarkovProcesses.check_consistency(σ) && MarkovProcesses.check_consistency(σ2) + catch err + @show err + @show length(σ.values), length(σ.times), length(σ.transitions) + println() + @show σ.values + @show σ.times + @show σ.transitions + throw(err) + end +end + +return test_all + diff --git a/tests/unit/l_dist_lp.jl b/tests/unit/l_dist_lp.jl new file mode 100644 index 0000000000000000000000000000000000000000..ff439fa5616204ae61c578c73c1116a4afd5dc93 --- /dev/null +++ b/tests/unit/l_dist_lp.jl @@ -0,0 +1,11 @@ + +using MarkovProcesses + +load_model("SIR") +SIR.time_bound = 100.0 +σ1, σ2 = simulate(SIR), simulate(SIR) + +dist_lp([σ1], [σ2]) + +return true +