Implementation of the dist function. Add of several tests.

Fix of a major issue in the simulation: sometimes the number of states wasn't consistent. Add of test to check the consistency of the Trajectory structure (unit/check_trajectory_consistency).

Implementation of the dist function. Add of several tests.
963ef13b · Bentriou Mahmoud · 50e2b9ae · 963ef13b · 963ef13b · 963ef13b
Commit 963ef13b authored 4 years ago by Bentriou Mahmoud
--- a/core/MarkovProcesses.jl
+++ b/core/MarkovProcesses.jl
@@ -9,8 +9,7 @@ export load_model, get_module_path
 include("model.jl")
 export Observations, AbstractTrajectory, Trajectory
-export +,-,δ
+export +, -, δ, dist_lp
-export dist_lp, l_dist_lp
 export get_obs_var, length_states, length_obs_var, is_bounded
 include("trajectory.jl")

--- a/core/model.jl
+++ b/core/model.jl
@@ -47,7 +47,7 @@ end
 function simulate(m::ContinuousTimeModel)
    # trajectory fields
-    full_values = zeros(1, m.d)
+    full_values = Matrix{Int}(undef, 1, m.d)
    full_values[1,:] = m.x0
    times = Float64[m.t0]
    transitions = Union{String,Nothing}[nothing]
@@ -75,18 +75,18 @@ function simulate(m::ContinuousTimeModel)
        n += i
        is_absorbing = m.is_absorbing(m.p,xn)::Bool
    end
-    values = @view full_values[:,m._g_idx] 
    if is_bounded(m)
        if times[end] > m.time_bound
-            values[end,:] = values[end-1,:]
+            full_values[end,:] = full_values[end-1,:]
            times[end] = m.time_bound
            transitions[end] = nothing
        else
-            vcat(values, values[end,:])
+            full_values = vcat(full_values, reshape(full_values[end,:], 1, m.d))
            push!(times, m.time_bound)
            push!(transitions, nothing)
        end
    end
+    values = @view full_values[:,m._g_idx] 
    return Trajectory(m, values, times, transitions)
 end

--- a/core/trajectory.jl
+++ b/core/trajectory.jl
@@ -15,7 +15,7 @@ function -(σ1::AbstractTrajectory,σ2::AbstractTrajectory) end
 # Top-level Lp distance function
 """
-    `list_dist_lp(l_σ1, l_σ2; verbose, p, str_stat_list, str_stat_trajectory)`   
+    `dist_lp(l_σ1, l_σ2; verbose, p, str_stat_list, str_stat_trajectory)`   
 Function that computes Lp distance between two set of any dimensional trajectories.
 ...
@@ -26,13 +26,14 @@ Function that computes Lp distance between two set of any dimensional trajectori
 It is salso called linkage function in clustering field. Only "mean" is available for now on.
 ...
 """
-function list_dist_lp(l_σ1::Vector{AbstractTrajectory}, l_σ2::Vector{AbstractTrajectory};  
+function dist_lp(l_σ1::Vector{<:AbstractTrajectory}, l_σ2::Vector{<:AbstractTrajectory};  
                 verbose::Bool = false, p::Int = 1, str_stat_list::String = "mean", str_stat_trajectory::String = "mean")
    if str_stat_list == "mean"
        return dist_lp_mean(l_σ1, l_σ2; verbose = verbose, p = p, str_stat_trajectory = str_stat_trajectory)
    end
    error("Unrecognized statistic in dist_lp")
 end
 """
    `dist_lp(σ1, σ2; verbose, p, str_stat)`   
@@ -54,6 +55,7 @@ function dist_lp(σ1::AbstractTrajectory, σ2::AbstractTrajectory;
    end
    error("Unrecognized statistic in dist_lp")
 end
 """
    `dist_lp(σ1, σ2, var; verbose, p, str_stat)`   
@@ -72,11 +74,11 @@ function dist_lp(σ1::AbstractTrajectory, σ2::AbstractTrajectory, var::String;
    if !is_bounded(σ1) || !is_bounded(σ2)
        @warn "Lp distance computed on unbounded trajectories. Result should be wrong"
    end
-    return dist_lp(σ1[var], σ1["times"], σ2[var], σ2["times"])
+    return dist_lp(σ1[var], σ1["times"], σ2[var], σ2["times"]; verbose = false, p = p)
 end
 # Distance function. Vectorized version
-function dist_lp(x_obs::AbstractVector{Int}, t_x::Vector{Float64},  y_obs::AbstractVector{Int}, t_y::Vector{Float64}; 
+function dist_lp(x_obs::AbstractVector{Int}, t_x::Vector{Float64}, y_obs::AbstractVector{Int}, t_y::Vector{Float64}; 
                 verbose::Bool = false, p::Int = 1)
    current_y_obs = y_obs[1]
    current_t_y = t_y[2]
@@ -91,11 +93,11 @@ function dist_lp(x_obs::AbstractVector{Int}, t_x::Vector{Float64},  y_obs::Abstr
        end
        last_t_y = t_x[i]
        while current_t_y < t_x[i+1]
-            rect =  abs(current_y_obs - x_obs[i]) * (current_t_y - last_t_y)
+            rect =  abs(current_y_obs - x_obs[i])^p * (current_t_y - last_t_y)
            res += rect
            if verbose
                println("-- in loop :")
-                println("-- add : $rect abs($current_y_obs - $(x_obs[i])) * ($current_t_y - $(last_t_y)) / $(abs(current_y_obs - x_obs[i])) * $(current_t_y - last_t_y)")
+                println("-- add : $rect abs($current_y_obs - $(x_obs[i]))^p * ($current_t_y - $(last_t_y)) / $(abs(current_y_obs - x_obs[i])^p) * $(current_t_y - last_t_y)")
                print("-- ")
                @show current_y_obs, current_t_y
            end
@@ -105,17 +107,17 @@ function dist_lp(x_obs::AbstractVector{Int}, t_x::Vector{Float64},  y_obs::Abstr
            current_t_y = t_y[idx+1]
        end
        last_t_read = max(last_t_y, t_x[i])
-        rect = abs(current_y_obs - x_obs[i]) * (t_x[i+1] - last_t_read)
+        rect = abs(current_y_obs - x_obs[i])^p * (t_x[i+1] - last_t_read)
        res += rect
        if verbose
-            println("add : $rect abs($current_y_obs - $(x_obs[i])) * ($(t_x[i+1]) - $last_t_read) / $(abs(current_y_obs - x_obs[i])) * $(t_x[i+1] - last_t_read)")
+            println("add : $rect abs($current_y_obs - $(x_obs[i]))^p * ($(t_x[i+1]) - $last_t_read) / $(abs(current_y_obs - x_obs[i])^p) * $(t_x[i+1] - last_t_read)")
            @show t_x[i+1], t_y[idx+1]
            @show y_obs[idx], x_obs[i]
            @show last_t_read
            @show current_y_obs
        end
    end
-    return res
+    return res^(1/p)
 end
 # For all the observed variables
 function dist_lp_mean(σ1::AbstractTrajectory, σ2::AbstractTrajectory;  
@@ -123,12 +125,12 @@ function dist_lp_mean(σ1::AbstractTrajectory, σ2::AbstractTrajectory;
    if get_obs_var(σ1) != get_obs_var(σ2) error("Lp distances should be computed with the same observed variables") end
    res = 0.0
    for var in get_obs_var(σ1)
-        res += dist_lp(σ1, σ2, var)
+        res += dist_lp(σ1, σ2, var; verbose = verbose, p = p)
    end
    return res / length_obs_var(σ1)
 end
 # For a list of trajectories
-function list_dist_lp_mean(l_σ1::Vector{AbstractTrajectory}, l_σ2::Vector{AbstractTrajectory};  
+function dist_lp_mean(l_σ1::Vector{<:AbstractTrajectory}, l_σ2::Vector{<:AbstractTrajectory};  
                 verbose::Bool = false, p::Int = 1, str_stat_trajectory::String = "mean")
    res = 0.0
    for σ1 in l_σ1
@@ -138,6 +140,36 @@ function list_dist_lp_mean(l_σ1::Vector{AbstractTrajectory}, l_σ2::Vector{Abst
    end
    return res / (length(l_σ1)*length(l_σ2))
 end
+# Get the value at any time
+function _f_step(l_x::AbstractArray, l_t::AbstractArray, t::Float64)
+    @assert length(l_x) == length(l_t)
+    for i = 1:(length(l_t)-1)
+        if l_t[i] <= t < l_t[i+1]
+            return l_x[i]
+        end
+    end
+    if l_t[end] == t
+        return l_x[end]
+    end
+    return missing
+end
+# Riemann sum
+function _riemann_sum(f::Function, t_begin::Real, t_end::Real, step::Float64)
+    res = 0.0
+    l_t = collect(t_begin:step:t_end)
+    for i in 2:length(l_t)
+        res += f(l_t[i]) * (l_t[i] - l_t[i-1])
+    end
+    return res
+end
+function check_consistency(σ::AbstractTrajectory)
+    test_sizes = length(σ.times) == length(σ.transitions) == size(σ.values)[1]
+    test_obs_var = length_obs_var(σ) == length(σ.m.g) == size(σ.values)[2]
+    if !test_sizes error("Issue between sizes of values/times/transitions in this trajectory.") end
+    if !test_obs_var error("Issue between sizes of observed variables in model and values") end
+    return true
+end
 # Properties of the trajectory
 length_states(σ::AbstractTrajectory) = length(σ.times)

--- a/tests/dist_lp/dist_case_2.jl
+++ b/tests/dist_lp/dist_case_2.jl
+using MarkovProcesses
+import QuadGK: quadgk
+load_model("SIR")
+test_all = true
+for p = 1:2
+    res = (4-1)^p * 0.5 + (4-2)^p * 0.1 + 0 + (5-3)^p * 0.1 + (5-1)^p * (1.4 - 0.8) + (3-1)^p * (3.0-1.4)
+    res = res^(1/p)
+    y_obs = [1, 2, 5, 3, 3]
+    t_y = [0.0, 0.5, 0.7, 1.4, 3.0]
+    values = zeros(length(y_obs), 1)
+    values[:,1] = y_obs
+    l_tr = Vector{Nothing}(nothing, length(y_obs))
+    σ2 = Trajectory(SIR, values, t_y, l_tr)
+    x_obs = [4, 2, 3, 1, 1]
+    t_x = [0.0, 0.6, 0.7, 0.8, 3.0]
+    values = zeros(length(x_obs), 1)
+    values[:,1] = x_obs
+    l_tr = Vector{Nothing}(nothing, length(x_obs))
+    σ1 = Trajectory(SIR, values, t_x, l_tr)
+    test_1 = isapprox(dist_lp(x_obs, t_x, y_obs, t_y; p=p), res; atol = 1E-10) 
+    test_1 = test_1 && isapprox(dist_lp(σ1,σ2; p=p), res; atol = 1E-10)
+    test_1_bis = isapprox(dist_lp(y_obs, t_y, x_obs, t_x; p=p), res; atol = 1E-10) 
+    test_1_bis = test_1_bis && isapprox(dist_lp(σ2,σ1; p=p), res; atol = 1E-10)
+    f_x(t::Real) = MarkovProcesses._f_step(x_obs, t_x, t)
+    f_y(t::Real) = MarkovProcesses._f_step(y_obs, t_y, t)
+    diff_f(t) = abs(f_x(t) - f_y(t))^p
+    int, err = quadgk(diff_f, 0.0, 3.0)
+    res_int = int^(1/p)
+    test_2 = isapprox(res, res_int; atol = err)
+    global test_all = test_all && test_1 && test_1_bis && test_2
+end
+return test_all
--- a/tests/dist_lp/dist_case_3.jl
+++ b/tests/dist_lp/dist_case_3.jl
+using MarkovProcesses
+import QuadGK: quadgk
+load_model("SIR")
+test_all = true
+for p = 1:2
+    x_obs =[5, 6, 5, 4, 3, 2, 1, 1]
+    t_x = [0.0, 3.10807, 4.29827, 4.40704, 5.67024, 7.1299, 11.2763, 20.0]
+    values = zeros(length(x_obs), 1)
+    values[:,1] = x_obs
+    l_tr = Vector{Nothing}(nothing, length(x_obs))
+    σ1 = Trajectory(SIR, values, t_x, l_tr)
+    y_obs =[5, 4, 5, 4, 3, 4, 3, 2, 3, 2, 1, 2, 3, 4, 3, 4, 4]
+    t_y = [0.0, 0.334082, 1.21012, 1.40991, 1.58866, 2.45879, 2.94545, 4.66746, 5.44723, 5.88066, 7.25626, 11.4036, 13.8373, 17.1363, 17.8193, 18.7613, 20.0]
+    values = zeros(length(y_obs), 1)
+    values[:,1] = y_obs
+    l_tr = Vector{Nothing}(nothing, length(y_obs))
+    σ2 = Trajectory(SIR, values, t_y, l_tr)
+    f_x(t::Real) = MarkovProcesses._f_step(x_obs, t_x, t)
+    f_y(t::Real) = MarkovProcesses._f_step(y_obs, t_y, t)
+    diff_f(t) = abs(f_x(t) - f_y(t))^p
+    int, err = quadgk(diff_f, 0.0, 20.0, rtol=1e-10)
+    int_riemann = MarkovProcesses._riemann_sum(diff_f, 0.0, 20.0, 1E-5)
+    int_riemann = int_riemann^(1/p)
+    res1 = dist_lp(x_obs, t_x, y_obs, t_y; p=p)
+    res2 = dist_lp(σ1,σ2; p=p)
+    res1_bis = dist_lp(y_obs, t_y, x_obs, t_x; p=p)
+    res2_bis = dist_lp(σ2,σ1; p=p)
+    test_1 = isapprox(res1, int_riemann; atol = 1E-3) 
+    test_1 = test_1 && isapprox(res2, int_riemann; atol = 1E-3)
+    test_1_bis = isapprox(res1_bis, int_riemann; atol = 1E-3) 
+    test_1_bis = test_1_bis && isapprox(res2_bis, int_riemann; atol = 1E-3)
+    test_2 = res1 == res2 == res1_bis == res2_bis
+    global test_all = test_all && test_1 && test_1_bis && test_2
+end
+return test_all
--- a/tests/dist_lp/dist_l1_case_1.jl
+++ b/tests/dist_lp/dist_l1_case_1.jl
+using MarkovProcesses
+import QuadGK: quadgk
+load_model("SIR")
+# Case 1 : 6.4
+x_obs = [2, 4, 3, 3]
+t_x = [0.0, 0.5, 1.2, 2.2]
+t_y_bis = [0.0, 0.5, 1.2, 2.2]
+values = zeros(length(x_obs), 1)
+values[:,1] = x_obs
+l_tr = Vector{Nothing}(nothing, length(x_obs))
+σ1 = Trajectory(SIR, values, t_x, l_tr)
+y_obs = [6, 6]
+t_y = [0.0, 2.2]
+t_x_bis = [0.0, 2.2]
+values = zeros(length(y_obs), 1)
+values[:,1] = y_obs
+l_tr = Vector{Nothing}(nothing, length(y_obs))
+σ2 = Trajectory(SIR, values, t_y, l_tr)
+test_1 = dist_lp(x_obs, t_x, y_obs, t_y; p=1) == 6.4
+test_1 = test_1 && dist_lp(σ1, σ2; p=1) == 6.4
+f_x(t::Real) = MarkovProcesses._f_step(x_obs, t_x, t)
+f_y(t::Real) = MarkovProcesses._f_step(y_obs, t_y, t)
+diff_f(t) = abs(f_x(t) - f_y(t))
+int, err = quadgk(diff_f, 0.0, 2.2)
+test_2 = isapprox(6.4, int; atol=err)
+# Case 1 bis : inverse of case 1 
+test_1_bis = dist_lp(y_obs, t_y, x_obs, t_x; p=1) == 6.4 
+test_1_bis = test_1_bis && dist_lp(σ2, σ1; p=1) == 6.4
+return test_1 && test_1_bis && test_2
--- a/tests/dist_lp/dist_sim_sir.jl
+++ b/tests/dist_lp/dist_sim_sir.jl
+using MarkovProcesses
+load_model("SIR")
+time_bound = 100.0
+SIR.time_bound = time_bound
+test_all = true
+for p = 1:2
+    nb_sim = 10
+    for i = 1:nb_sim
+        σ1 = simulate(SIR)
+        σ2 = simulate(SIR)
+        d = dist_lp(σ1, σ2, "I"; p = p)
+        d2 = dist_lp(σ1, σ2; p = p)
+        f_x(t::Float64) = MarkovProcesses._f_step(σ1["I"], σ1["times"], t)
+        f_y(t::Float64) = MarkovProcesses._f_step(σ2["I"], σ2["times"], t)
+        diff_f(t) = abs(f_x(t) - f_y(t))^p
+        int_riemann = MarkovProcesses._riemann_sum(diff_f, SIR.t0, SIR.time_bound, 1E-3)
+        res_int_riemann = int_riemann^(1/p)
+        test = isapprox(d, res_int_riemann; atol = 1E-1)
+        test2 = isapprox(d2, res_int_riemann; atol = 1E-1)
+        #@show d, res_int_riemann
+        #@show test
+        global test_all = test_all && test && test2
+    end
+end
+return test_all
--- a/tests/run_all.jl
+++ b/tests/run_all.jl
 include("run_unit.jl")
 include("run_simulation.jl")
+include("run_dist_lp.jl")
--- a/tests/run_dist_lp.jl
+++ b/tests/run_dist_lp.jl
+using Test
+@testset "Distance Lp tests" begin
+    @test include("dist_lp/dist_l1_case_1.jl")
+    @test include("dist_lp/dist_case_2.jl")
+    @test include("dist_lp/dist_case_3.jl")
+    @test include("dist_lp/dist_sim_sir.jl")
+end
--- a/tests/run_unit.jl
+++ b/tests/run_unit.jl
@@ -15,5 +15,7 @@ using Test
    @test include("unit/length_obs_var.jl")
    @test include("unit/dist_lp_var.jl")
    @test include("unit/dist_lp.jl")
+    @test include("unit/l_dist_lp.jl")
+    @test include("unit/check_trajectory_consistency.jl")
 end
--- a/tests/unit/check_trajectory_consistency.jl
+++ b/tests/unit/check_trajectory_consistency.jl
+using MarkovProcesses
+load_model("SIR")
+SIR.time_bound = 100.0
+load_model("ER")
+ER.time_bound = 10.0
+test_all = true
+nb_sim = 1000
+for i = 1:nb_sim
+    σ = simulate(SIR)
+    σ2 = simulate(ER)
+    try
+        global test_all = test_all && MarkovProcesses.check_consistency(σ) && MarkovProcesses.check_consistency(σ2)
+    catch err
+        @show err
+        @show length(σ.values), length(σ.times), length(σ.transitions)
+        println()
+        @show σ.values
+        @show σ.times
+        @show σ.transitions 
+        throw(err)
+    end
+end
+return test_all
--- a/tests/unit/l_dist_lp.jl
+++ b/tests/unit/l_dist_lp.jl
+using MarkovProcesses
+load_model("SIR")
+SIR.time_bound = 100.0
+σ1, σ2 = simulate(SIR), simulate(SIR)
+dist_lp([σ1], [σ2])
+return true