From 50e2b9ae5ba3c2a0e248b890dfd0142df14b11dd Mon Sep 17 00:00:00 2001
From: Mahmoud Bentriou <mahmoud.bentriou@centralesupelec.fr>
Date: Tue, 17 Nov 2020 22:00:18 +0100
Subject: [PATCH] Change of 2 function names in trajectory.

Implementation of the lp distance for trajectories.
Add of unit tests for lp distance.
---
 bench/array_memory_order/multiple_sim.jl      |   3 +-
 .../read_random_state_trajectory.jl           |   4 +-
 bench/array_memory_order/read_trajectory.jl   |   4 +-
 bench/pygmalion/multiple_long_sim_er.jl       |  38 +++++
 .../pygmalion/read_random_state_trajectory.jl |   2 +-
 bench/pygmalion/read_trajectory_er.jl         |   2 +-
 core/MarkovProcesses.jl                       |   5 +-
 core/model.jl                                 |   4 +
 core/trajectory.jl                            | 135 +++++++++++++++++-
 tests/run_unit.jl                             |   4 +
 tests/unit/dist_lp.jl                         |  11 ++
 tests/unit/dist_lp_var.jl                     |  11 ++
 tests/unit/is_always_bounded_sir.jl           |  13 ++
 tests/unit/length_obs_var.jl                  |  23 +++
 14 files changed, 246 insertions(+), 13 deletions(-)
 create mode 100644 bench/pygmalion/multiple_long_sim_er.jl
 create mode 100644 tests/unit/dist_lp.jl
 create mode 100644 tests/unit/dist_lp_var.jl
 create mode 100644 tests/unit/is_always_bounded_sir.jl
 create mode 100644 tests/unit/length_obs_var.jl

diff --git a/bench/array_memory_order/multiple_sim.jl b/bench/array_memory_order/multiple_sim.jl
index 1fb5fc6..eadcbd2 100644
--- a/bench/array_memory_order/multiple_sim.jl
+++ b/bench/array_memory_order/multiple_sim.jl
@@ -25,7 +25,6 @@ if ARGS[1] == "SIR"
 elseif ARGS[1] == "ER"
     l_var = ["E","S","ES","P"]
     bound_time = 20.0
-    nbr_sim = 10000
 
     load_model("_bench_perf_test/ER_col")
     ER_col.time_bound = bound_time
@@ -43,7 +42,7 @@ elseif ARGS[1] == "ER"
 else
     error("Unavailable model")
 end
-nbr_sim = 10000
+nbr_sim = 1000
 
 println("Col")
 b1_col = @benchmark for i = 1:$(nbr_sim) _simulate_col($model_col) end
diff --git a/bench/array_memory_order/read_random_state_trajectory.jl b/bench/array_memory_order/read_random_state_trajectory.jl
index c7b869c..0804ab8 100644
--- a/bench/array_memory_order/read_random_state_trajectory.jl
+++ b/bench/array_memory_order/read_random_state_trajectory.jl
@@ -39,7 +39,7 @@ println("Col buffer:")
 
 function random_trajectory_value_col(m::ContinuousTimeModel)
     σ = _simulate_col_buffer(m)
-    n_states = get_states_number(σ)
+    n_states = length_states(σ)
     nb_rand = 1000
     res = 0
     for i = 1:nb_rand
@@ -57,7 +57,7 @@ println("Row buffer:")
 
 function random_trajectory_value_row(m::ContinuousTimeModel)
     σ = _simulate_row_buffer(m)
-    n_states = get_states_number(σ)
+    n_states = length_states(σ)
     nb_rand = 1000
     res = 0
     for i = 1:nb_rand
diff --git a/bench/array_memory_order/read_trajectory.jl b/bench/array_memory_order/read_trajectory.jl
index 65d9e47..db51374 100644
--- a/bench/array_memory_order/read_trajectory.jl
+++ b/bench/array_memory_order/read_trajectory.jl
@@ -40,7 +40,7 @@ println("Col buffer:")
 function read_trajectory_col(m::Model)
     res = 0
     σ = _simulate_col_buffer(m)
-    n_states = get_states_number(σ)
+    n_states = length_states(σ)
     n_read = 100000
     for k = 1:n_read
         for i = 1:n_states
@@ -59,7 +59,7 @@ println("Row buffer:")
 function read_trajectory_row(m::Model)
     res = 0
     σ = _simulate_row_buffer(m)
-    n_states = get_states_number(σ)
+    n_states = length_states(σ)
     n_read = 100000
     for k = 1:n_read
         for i = 1:n_states
diff --git a/bench/pygmalion/multiple_long_sim_er.jl b/bench/pygmalion/multiple_long_sim_er.jl
new file mode 100644
index 0000000..3e27d6a
--- /dev/null
+++ b/bench/pygmalion/multiple_long_sim_er.jl
@@ -0,0 +1,38 @@
+
+using BenchmarkTools
+BenchmarkTools.DEFAULT_PARAMETERS.samples = 20000
+
+nb_sim = 1000
+
+println("Pygmalion:")
+
+using pygmalion
+str_m = "enzymatic_reaction"
+str_d = "abc_er"
+pygmalion.load_model(str_m)
+str_oml = "P,R,time"
+ll_om = split(str_oml, ",")
+x0 = State(95.0, 95.0, 0.0, 0.0, 0.0, 0.0)
+p_true = Parameters(1.0, 1.0, 1.0)
+u = Control(20.0)
+tml = 1:400
+g_all = create_observation_function([ObserverModel(str_oml, tml)]) 
+@timev pygmalion.simulate(f, g_all, x0, u, p_true; on = nothing, full_timeline = true)
+
+b1_pyg = @benchmark for i=1:$(nb_sim) pygmalion.simulate($f, $g_all, $x0, $u, $p_true; on = nothing, full_timeline = true) end
+b2_pyg = @benchmark for i=1:$(nb_sim) pygmalion.simulate(f, g_all, x0, u, p_true; on = nothing, full_timeline = true) end
+@show minimum(b1_pyg), mean(b1_pyg), maximum(b1_pyg)
+@show minimum(b2_pyg), mean(b2_pyg), maximum(b2_pyg)
+
+println("MarkovProcesses:")
+
+using MarkovProcesses
+MarkovProcesses.load_model("ER")
+ER.time_bound = 20.0
+@timev MarkovProcesses.simulate(ER)
+
+b1 = @benchmark for i=1:$(nb_sim) MarkovProcesses.simulate($ER) end
+b2 = @benchmark for i=1:$(nb_sim) MarkovProcesses.simulate(ER) end
+@show minimum(b1), mean(b1), maximum(b1)
+@show minimum(b2), mean(b2), maximum(b2)
+
diff --git a/bench/pygmalion/read_random_state_trajectory.jl b/bench/pygmalion/read_random_state_trajectory.jl
index 9877751..a0cf950 100644
--- a/bench/pygmalion/read_random_state_trajectory.jl
+++ b/bench/pygmalion/read_random_state_trajectory.jl
@@ -36,7 +36,7 @@ MarkovProcesses.load_model("ER")
 ER.time_bound = 10.0
 σ = MarkovProcesses.simulate(ER)
 function random_trajectory_value(σ::AbstractTrajectory)
-    n_states = get_states_number(σ)
+    n_states = length_states(σ)
     return σ["P"][rand(1:n_states)]
 end
 # Bench
diff --git a/bench/pygmalion/read_trajectory_er.jl b/bench/pygmalion/read_trajectory_er.jl
index a108da2..36ec8cc 100644
--- a/bench/pygmalion/read_trajectory_er.jl
+++ b/bench/pygmalion/read_trajectory_er.jl
@@ -48,7 +48,7 @@ MarkovProcesses.load_model("ER")
 ER.time_bound = 10.0
 σ = MarkovProcesses.simulate(ER)
 function read_trajectory(σ::AbstractTrajectory)
-    n_states = get_states_number(σ)
+    n_states = length_states(σ)
     res = 0
     for i = 1:n_states
         res += (σ["P"])[i]
diff --git a/core/MarkovProcesses.jl b/core/MarkovProcesses.jl
index cef5b90..d05b0c8 100644
--- a/core/MarkovProcesses.jl
+++ b/core/MarkovProcesses.jl
@@ -10,8 +10,9 @@ include("model.jl")
 
 export Observations, AbstractTrajectory, Trajectory
 export +,-,δ
-export get_obs_variables,get_states_number
-include("observations.jl")
+export dist_lp, l_dist_lp
+export get_obs_var, length_states, length_obs_var, is_bounded
+include("trajectory.jl")
 
 end
 
diff --git a/core/model.jl b/core/model.jl
index 6867206..79e42d3 100644
--- a/core/model.jl
+++ b/core/model.jl
@@ -81,6 +81,10 @@ function simulate(m::ContinuousTimeModel)
             values[end,:] = values[end-1,:]
             times[end] = m.time_bound
             transitions[end] = nothing
+        else
+            vcat(values, values[end,:])
+            push!(times, m.time_bound)
+            push!(transitions, nothing)
         end
     end
     return Trajectory(m, values, times, transitions)
diff --git a/core/trajectory.jl b/core/trajectory.jl
index c59d18b..8652099 100644
--- a/core/trajectory.jl
+++ b/core/trajectory.jl
@@ -9,13 +9,141 @@ struct Trajectory <: AbstractTrajectory
     transitions::Vector{Union{String,Nothing}}
 end
 
+# Operations
 function +(σ1::AbstractTrajectory,σ2::AbstractTrajectory) end
 function -(σ1::AbstractTrajectory,σ2::AbstractTrajectory) end
-function δ(σ1::AbstractTrajectory,t::Float64) end
+
+# Top-level Lp distance function
+"""
+    `list_dist_lp(l_σ1, l_σ2; verbose, p, str_stat_list, str_stat_trajectory)`   
+
+Function that computes Lp distance between two set of any dimensional trajectories.
+...
+# Arguments
+- `l_σ1::Vector{AbstractTrajectory}` is the first set of trajectories. l_σ2 is the second.
+- `verbose::Bool` If `true`, launch a verbose execution of the computation. 
+- `str_stat_list::String` allows to set the statistic to apply on the distances of the trajectories. 
+It is salso called linkage function in clustering field. Only "mean" is available for now on.
+...
+"""
+function list_dist_lp(l_σ1::Vector{AbstractTrajectory}, l_σ2::Vector{AbstractTrajectory};  
+                 verbose::Bool = false, p::Int = 1, str_stat_list::String = "mean", str_stat_trajectory::String = "mean")
+    if str_stat_list == "mean"
+        return dist_lp_mean(l_σ1, l_σ2; verbose = verbose, p = p, str_stat_trajectory = str_stat_trajectory)
+    end
+    error("Unrecognized statistic in dist_lp")
+end
+"""
+    `dist_lp(σ1, σ2; verbose, p, str_stat)`   
+
+Function that computes Lp distance between two trajectories of any dimension.
+It computes Lp distances for each observed variable (contained in `get_obs_var(σ)`). and then apply a statistic on these distances.
+Requires `get_obs_var(σ1) == get_obs_var(σ2)`, it is verified if they are simulated from the same model.
+...
+# Arguments
+- `σ1::AbstractTrajectory` is the first trajectory. σ2 is the second.
+- `verbose::Bool` If `true`, launch a verbose execution of the computation. 
+- `str_stat::String` allows to set the statistic to apply on the distances computed  of the trajectories. 
+Only "mean" is available for now on.
+...
+"""
+function dist_lp(σ1::AbstractTrajectory, σ2::AbstractTrajectory;  
+                 verbose::Bool = false, p::Int = 1, str_stat::String = "mean")
+    if str_stat == "mean"
+        return dist_lp_mean(σ1, σ2; verbose = verbose, p = p)
+    end
+    error("Unrecognized statistic in dist_lp")
+end
+"""
+    `dist_lp(σ1, σ2, var; verbose, p, str_stat)`   
+
+Function that computes Lp distance between two trajectories of any dimension.
+It computes Lp distances for each observed variable (contained in `get_obs_var(σ)`). and then apply a statistic on these distances.
+Requires `get_obs_var(σ1) == get_obs_var(σ2)`, it is verified if they are simulated from the same model.
+...
+# Arguments
+- `σ1::AbstractTrajectory` is the first trajectory. σ2 is the second.
+- `var::String` is an observed variable. Have to be contained in `get_obs_var(σ1)` and `get_obs_var(σ2)`.
+- `verbose::Bool` If `true`, launch a verbose execution of the computation. 
+...
+"""
+function dist_lp(σ1::AbstractTrajectory, σ2::AbstractTrajectory, var::String; 
+                 verbose::Bool = false, p::Int = 1)
+    if !is_bounded(σ1) || !is_bounded(σ2)
+        @warn "Lp distance computed on unbounded trajectories. Result should be wrong"
+    end
+    return dist_lp(σ1[var], σ1["times"], σ2[var], σ2["times"])
+end
+
+# Distance function. Vectorized version
+function dist_lp(x_obs::AbstractVector{Int}, t_x::Vector{Float64},  y_obs::AbstractVector{Int}, t_y::Vector{Float64}; 
+                 verbose::Bool = false, p::Int = 1)
+    current_y_obs = y_obs[1]
+    current_t_y = t_y[2]
+    idx = 1
+    res = 0.0
+    for i = 1:(length(x_obs)-1)
+        if verbose
+            @show i
+            @show (t_x[i], x_obs[i])
+            @show current_t_y
+            @show t_x[i+1]
+        end
+        last_t_y = t_x[i]
+        while current_t_y < t_x[i+1]
+            rect =  abs(current_y_obs - x_obs[i]) * (current_t_y - last_t_y)
+            res += rect
+            if verbose
+                println("-- in loop :")
+                println("-- add : $rect abs($current_y_obs - $(x_obs[i])) * ($current_t_y - $(last_t_y)) / $(abs(current_y_obs - x_obs[i])) * $(current_t_y - last_t_y)")
+                print("-- ")
+                @show current_y_obs, current_t_y
+            end
+            idx += 1
+            last_t_y = t_y[idx]
+            current_y_obs = y_obs[idx]
+            current_t_y = t_y[idx+1]
+        end
+        last_t_read = max(last_t_y, t_x[i])
+        rect = abs(current_y_obs - x_obs[i]) * (t_x[i+1] - last_t_read)
+        res += rect
+        if verbose
+            println("add : $rect abs($current_y_obs - $(x_obs[i])) * ($(t_x[i+1]) - $last_t_read) / $(abs(current_y_obs - x_obs[i])) * $(t_x[i+1] - last_t_read)")
+            @show t_x[i+1], t_y[idx+1]
+            @show y_obs[idx], x_obs[i]
+            @show last_t_read
+            @show current_y_obs
+        end
+    end
+    return res
+end
+# For all the observed variables
+function dist_lp_mean(σ1::AbstractTrajectory, σ2::AbstractTrajectory;  
+                      verbose::Bool = false, p::Int = 1)
+    if get_obs_var(σ1) != get_obs_var(σ2) error("Lp distances should be computed with the same observed variables") end
+    res = 0.0
+    for var in get_obs_var(σ1)
+        res += dist_lp(σ1, σ2, var)
+    end
+    return res / length_obs_var(σ1)
+end
+# For a list of trajectories
+function list_dist_lp_mean(l_σ1::Vector{AbstractTrajectory}, l_σ2::Vector{AbstractTrajectory};  
+                 verbose::Bool = false, p::Int = 1, str_stat_trajectory::String = "mean")
+    res = 0.0
+    for σ1 in l_σ1
+        for σ2 in l_σ2
+            res += dist_lp(σ1, σ2; verbose = verbose, p = p, str_stat = str_stat_trajectory)
+        end
+    end
+    return res / (length(l_σ1)*length(l_σ2))
+end
 
 # Properties of the trajectory
-get_states_number(σ::AbstractTrajectory) = length(σ.times)
-get_obs_variables(σ::AbstractTrajectory) = (σ.m).g
+length_states(σ::AbstractTrajectory) = length(σ.times)
+length_obs_var(σ::AbstractTrajectory) = size(σ.values)[2]
+get_obs_var(σ::AbstractTrajectory) = (σ.m).g
+is_bounded(σ::AbstractTrajectory) = σ.transitions[end] == nothing 
 
 # Access to trajectory values
 get_var_values(σ::AbstractTrajectory, var::String) = 
@@ -23,6 +151,7 @@ get_var_values(σ::AbstractTrajectory, var::String) =
 get_state(σ::AbstractTrajectory, idx::Int) = @view σ.values[idx,:]
 get_value(σ::AbstractTrajectory, var::String, idx::Int) = 
 σ.values[idx,(σ.m)._map_obs_var_idx[var]] 
+function δ(σ1::AbstractTrajectory,t::Float64) end
 
 # Get var values ["I"]
 function getindex(σ::AbstractTrajectory, var::String)
diff --git a/tests/run_unit.jl b/tests/run_unit.jl
index ffe7a93..e214738 100644
--- a/tests/run_unit.jl
+++ b/tests/run_unit.jl
@@ -11,5 +11,9 @@ using Test
     @test include("unit/change_obs_var_sir.jl")
     @test include("unit/change_obs_var_sir_2.jl")
     @test include("unit/getindex_access_trajectory.jl")
+    @test include("unit/is_always_bounded_sir.jl")
+    @test include("unit/length_obs_var.jl")
+    @test include("unit/dist_lp_var.jl")
+    @test include("unit/dist_lp.jl")
 end
 
diff --git a/tests/unit/dist_lp.jl b/tests/unit/dist_lp.jl
new file mode 100644
index 0000000..bddab64
--- /dev/null
+++ b/tests/unit/dist_lp.jl
@@ -0,0 +1,11 @@
+
+using MarkovProcesses
+
+load_model("SIR")
+SIR.time_bound = 100.0
+σ1, σ2 = simulate(SIR), simulate(SIR)
+
+dist_lp(σ1, σ2)
+
+return true
+
diff --git a/tests/unit/dist_lp_var.jl b/tests/unit/dist_lp_var.jl
new file mode 100644
index 0000000..0ddf14e
--- /dev/null
+++ b/tests/unit/dist_lp_var.jl
@@ -0,0 +1,11 @@
+
+using MarkovProcesses
+
+load_model("SIR")
+SIR.time_bound = 100.0
+σ1, σ2 = simulate(SIR), simulate(SIR)
+
+dist_lp(σ1, σ2, "I")
+
+return true
+
diff --git a/tests/unit/is_always_bounded_sir.jl b/tests/unit/is_always_bounded_sir.jl
new file mode 100644
index 0000000..99b4661
--- /dev/null
+++ b/tests/unit/is_always_bounded_sir.jl
@@ -0,0 +1,13 @@
+
+using MarkovProcesses
+
+test = true
+load_model("SIR")
+SIR.time_bound = 120.0
+for i = 1:1000
+    σ = simulate(SIR)
+    global test = test && is_bounded(σ)
+end
+
+return test
+
diff --git a/tests/unit/length_obs_var.jl b/tests/unit/length_obs_var.jl
new file mode 100644
index 0000000..1f32539
--- /dev/null
+++ b/tests/unit/length_obs_var.jl
@@ -0,0 +1,23 @@
+
+using MarkovProcesses
+
+load_model("SIR")
+σ = simulate(SIR)
+test_1 = length_obs_var(σ) == 1
+
+set_observed_var!(SIR, ["I", "R"])
+σ = simulate(SIR)
+test_2 = length_obs_var(σ) == 2
+
+load_model("ER")
+σ = simulate(ER)
+test_3 = length_obs_var(σ) == 1
+
+set_observed_var!(ER, ["P", "S", "ES"])
+σ = simulate(ER)
+test_4 = length_obs_var(σ) == 3
+
+test = test_1 && test_2 && test_3 && test_4
+
+return test
+
-- 
GitLab