Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Bentriou Mahmoud
MarkovProcesses.jl
Commits
888e74c1
Commit
888e74c1
authored
May 17, 2021
by
Bentriou Mahmoud
Browse files
change abc dataset to matrix
parent
7d3b7089
Changes
4
Hide whitespace changes
Inline
Side-by-side
algorithms/abc_model_choice.jl
View file @
888e74c1
struct
AbcModelChoiceDataset
models_indexes
::
Vector
{
Int
}
summary_stats_
vector
::
Vector
summary_stats_
matrix
::
Matrix
epsilon
::
Float64
end
...
...
@@ -14,7 +14,7 @@ end
function
getproperty
(
dataset
::
AbcModelChoiceDataset
,
sym
::
Symbol
)
if
sym
==
:
X
return
dataset
.
summary_stats_
vector
return
dataset
.
summary_stats_
matrix
elseif
sym
==
:
y
return
dataset
.
models_indexes
else
...
...
@@ -25,21 +25,21 @@ end
function
abc_model_choice_dataset
(
models
::
Vector
{
<:
Union
{
Model
,
ParametricModel
}},
summary_stats_observations
,
summary_stats_func
::
Function
,
distance_func
::
Function
,
k
::
Int
,
N
::
Int
)
k
::
Int
,
N
::
Int
;
dir_results
::
Union
{
Nothing
,
String
}
=
nothing
)
nbr_models
=
length
(
models
)
models_prior
=
Categorical
([
1
/
nbr_models
for
i
=
1
:
nbr_models
])
return
abc_model_choice_dataset
(
models
,
models_prior
,
summary_stats_observations
,
summary_stats_func
,
distance_func
,
k
,
N
)
return
abc_model_choice_dataset
(
models
,
models_prior
,
summary_stats_observations
,
summary_stats_func
,
distance_func
,
k
,
N
;
dir_results
=
dir_results
)
end
function
abc_model_choice_dataset
(
models
::
Vector
{
<:
Union
{
Model
,
ParametricModel
}},
models_prior
::
DiscreteUnivariateDistribution
,
summary_stats_observations
,
summary_stats_func
::
Function
,
distance_func
::
Function
,
k
::
Int
,
N
::
Int
)
k
::
Int
,
N
::
Int
;
dir_results
::
Union
{
Nothing
,
String
}
=
nothing
)
@assert
length
(
models
)
>=
2
"Should contain at least 2 models"
@assert
ncategories
(
models_prior
)
==
length
(
models
)
"Number of categories of models' prior and number of models do not equal"
models_indexes
=
zeros
(
Int
,
N
)
summary_stats_
vector
=
Vector
{
type
of
(
summary_stats_observations
)
}(
undef
,
N
)
summary_stats_
matrix
=
zeros
(
el
type
(
summary_stats_observations
)
,
length
(
summary_stats_observations
)
,
N
)
distances
=
zeros
(
N
)
bool_parametric
=
typeof
(
models
)
<:
Vector
{
ParametricModel
}
for
i
=
1
:
N
...
...
@@ -52,12 +52,16 @@ function abc_model_choice_dataset(models::Vector{<:Union{Model,ParametricModel}}
else
sim
=
simulate
(
models
[
current_idx_model
])
end
summary_stats_vector
[
i
]
=
summary_stats_func
(
sim
)
distances
[
i
]
=
distance_func
(
summary_stats_vector
[
i
],
summary_stats_observations
)
ss_i
=
summary_stats_func
(
sim
)
summary_stats_matrix
[
:
,
i
]
=
ss_i
distances
[
i
]
=
distance_func
(
ss_i
,
summary_stats_observations
)
end
k_nn
=
sortperm
(
distances
,
alg
=
QuickSort
)[
1
:
k
]
return
AbcModelChoiceDataset
(
models_indexes
[
k_nn
],
summary_stats_vector
[
k_nn
],
distances
[
k_nn
[
end
]])
if
dir_results
!=
nothing
dir_results
=
basename
(
dir_results
)
!=
""
?
dir_results
*
"/"
:
dir_results
end
return
AbcModelChoiceDataset
(
models_indexes
[
k_nn
],
summary_stats_matrix
[
:
,
k_nn
],
distances
[
k_nn
[
end
]])
end
function
rf_abc_model_choice
(
models
::
Vector
{
<:
Union
{
Model
,
ParametricModel
}},
...
...
@@ -69,7 +73,7 @@ function rf_abc_model_choice(models::Vector{<:Union{Model,ParametricModel}},
@assert
k
<=
N_ref
trainset
=
abc_model_choice_dataset
(
models
,
summary_stats_observations
,
summary_stats_func
,
distance_func
,
k
,
N_ref
)
gridsearch
=
GridSearchCV
(
RandomForestClassifier
(
oob_score
=
true
),
hyperparameters_range
)
fit!
(
gridsearch
,
trainset
.
X
,
trainset
.
y
)
fit!
(
gridsearch
,
transpose
(
trainset
.
X
)
,
trainset
.
y
)
best_rf
=
gridsearch
.
best_estimator_
return
RandomForestABC
(
trainset
,
best_rf
,
summary_stats_observations
,
predict
(
best_rf
,
[
summary_stats_observations
]))
end
...
...
@@ -84,7 +88,7 @@ function posterior_proba_model(rf_abc::RandomForestABC)
dict_params
[
Symbol
(
param
)]
=
get_params
(
rf_abc
.
clf
)[
param
]
end
rf_regressor
=
RandomForestRegressor
(;
dict_params
...
)
fit!
(
rf_regressor
,
rf_abc
.
reference_table
.
X
,
y_oob_regression
)
fit!
(
rf_regressor
,
transpose
(
rf_abc
.
reference_table
.
X
)
,
y_oob_regression
)
return
1
-
predict
(
rf_regressor
,
[
rf_abc
.
summary_stats_observations
])[
1
]
end
tests/abc_model_choice/toy_example.jl
View file @
888e74c1
...
...
@@ -66,7 +66,8 @@ savefig("set.svg")
grid
=
Dict
(
:
n_estimators
=>
[
500
],
:
min_samples_leaf
=>
[
1
],
:
min_samples_split
=>
[
2
],
:
n_jobs
=>
[
8
])
@timev
res_rf_abc
=
rf_abc_model_choice
(
models
,
ss_observations
,
ss_func
,
29000
;
hyperparameters_range
=
grid
)
@show
posterior_proba_model
(
res_rf_abc
)
println
(
classification_report
(
y_true
=
abc_testset
.
y
,
y_pred
=
predict
(
res_rf_abc
.
clf
,
abc_testset
.
X
)))
@show
accuracy_score
(
abc_testset
.
y
,
predict
(
res_rf_abc
.
clf
,
abc_testset
.
X
))
@show
posterior_proba_model
(
res_rf_abc
)
X_testset
=
transpose
(
abc_testset
.
X
)
println
(
classification_report
(
y_true
=
abc_testset
.
y
,
y_pred
=
predict
(
res_rf_abc
.
clf
,
X_testset
)))
@show
accuracy_score
(
abc_testset
.
y
,
predict
(
res_rf_abc
.
clf
,
X_testset
))
tests/abc_model_choice/toy_example_ma.jl
0 → 100644
View file @
888e74c1
# From Pudlo: Reliable ABC model choice, 2016, Appendix B
using
ARFIMA
using
Random
using
LinearAlgebra
using
MarkovProcesses
using
Distributions
using
ScikitLearn
@sk_import
metrics
:
(
accuracy_score
,
classification_report
)
using
StatsBase
:
autocor
struct
MA1
<:
Model
end
struct
MA2
<:
Model
end
import
MarkovProcesses
:
simulate
global
N_tml
=
100
global
σ
=
1.0
struct
TriangleDist
<:
ContinuousMultivariateDistribution
end
function
Distributions.rand
(
d
::
TriangleDist
)
θ1
=
rand
(
Uniform
(
-
2
,
2
))
θ2
=
(
θ1
<
0
)
?
rand
(
Uniform
(
-
θ1
-
1
,
1
))
:
rand
(
Uniform
(
θ1
-
1
,
1
))
return
[
θ1
,
θ2
]
end
Distributions
.
rand!
(
d
::
TriangleDist
,
p
::
AbstractVector
)
=
p
[
:
]
=
rand
(
d
)
Distributions
.
length
(
d
::
TriangleDist
)
=
2
Distributions
.
pdf
(
d
::
TriangleDist
,
p
::
AbstractVector
)
=
1
/
8
function
simulate
(
m
::
MA1
)
θ1
=
rand
(
Uniform
(
-
1
,
1
))
x
=
zeros
(
100
)
ϵtm1
=
rand
(
Normal
(
0
,
σ
^
2
))
x
[
1
]
=
ϵtm1
for
t
=
2
:
100
ϵt
=
rand
(
Normal
(
0
,
σ
^
2
))
x
[
t
]
=
ϵt
-
θ1
*
ϵtm1
ϵtm1
=
ϵt
end
return
x
end
function
simulate
(
m
::
MA2
)
θ1
,
θ2
=
rand
(
TriangleDist
())
x
=
zeros
(
100
)
ϵtm1
=
rand
(
Normal
(
0
,
σ
^
2
))
ϵtm2
=
rand
(
Normal
(
0
,
σ
^
2
))
x
[
1
]
=
ϵtm2
x
[
2
]
=
ϵtm1
-
θ1
*
ϵtm2
for
t
=
3
:
100
ϵt
=
rand
(
Normal
(
0
,
σ
^
2
))
x
[
t
]
=
ϵt
-
θ1
*
ϵtm1
-
θ2
*
ϵtm2
ϵtm2
=
ϵtm1
ϵtm1
=
ϵt
end
return
x
end
#=
function simulate(m::MA1)
θ1 = rand(Uniform(-1, 1))
return arma(N_tml, σ, nothing, SVector(θ1))
end
function simulate(m::MA2)
θ = rand(TriangleDist())
return arma(N_tml, σ, nothing, SVector(θ[1],θ[2]))
end
=#
m1
,
m2
=
MA1
(),
MA2
()
models
=
[
m1
,
m2
]
ss_func
(
y
)
=
autocor
(
y
,
1
:
7
)
dist_l2
(
s_sim
,
s_obs
)
=
norm
(
s_sim
-
s_obs
)
observations
=
simulate
(
m1
)
ss_observations
=
ss_func
(
observations
)
abc_testset
=
abc_model_choice_dataset
(
models
,
ss_observations
,
ss_func
,
dist_l2
,
10000
,
10000
)
grid
=
Dict
(
:
n_estimators
=>
[
300
],
:
min_samples_leaf
=>
[
1
],
:
min_samples_split
=>
[
2
],
:
n_jobs
=>
[
8
])
res_rf_abc
=
rf_abc_model_choice
(
models
,
ss_observations
,
ss_func
,
10000
;
hyperparameters_range
=
grid
)
@show
posterior_proba_model
(
res_rf_abc
)
X_testset
=
transpose
(
abc_testset
.
X
)
println
(
classification_report
(
y_true
=
abc_testset
.
y
,
y_pred
=
predict
(
res_rf_abc
.
clf
,
X_testset
)))
@show
accuracy_score
(
abc_testset
.
y
,
predict
(
res_rf_abc
.
clf
,
X_testset
))
tests/abc_model_choice/toy_model.ipynb
0 → 100644
View file @
888e74c1
%% Cell type:markdown id: tags:
# Setup models, dataset
%% Cell type:code id: tags:
```
julia
using
SpecialFunctions
using
LinearAlgebra
using
Random
using
Distributions
using
MarkovProcesses
global
n
=
20
struct
Model1
<:
Model
end
struct
Model2
<:
Model
end
struct
Model3
<:
Model
end
import
MarkovProcesses
:
simulate
function
simulate
(
m
::
Model1
)
param
=
rand
(
Exponential
(
1
))
return
rand
(
Exponential
(
param
),
n
)
end
function
simulate
(
m
::
Model2
)
param
=
rand
(
Normal
())
return
rand
(
LogNormal
(
param
,
1
),
n
)
end
function
simulate
(
m
::
Model3
)
param
=
rand
(
Exponential
(
1
))
return
rand
(
Gamma
(
2
,
1
/
param
),
n
)
end
m1
,
m2
,
m3
=
Model1
(),
Model2
(),
Model3
()
lh_m1
(
s
::
Vector
)
=
exp
(
log
(
gamma
(
n
+
1
))
-
(
n
+
1
)
*
log
(
1
+
s
[
1
]))
lh_m2
(
s
::
Vector
)
=
exp
(
-
s
[
2
]
^
2
/
(
2
n
*
(
n
+
1
))
-
(
s
[
3
]
^
2
)
/
2
+
(
s
[
2
]
^
2
)
/
(
2
n
)
-
s
[
2
])
*
(
2
pi
)
^
(
-
n
/
2
)
*
(
n
+
1
)
^
(
-
1
/
2
)
lh_m3
(
s
::
Vector
)
=
exp
(
s
[
2
])
*
gamma
(
2
n
+
1
)
/
gamma
(
2
)
^
n
*
(
1
+
s
[
1
])
^
(
-
2
n
-
1
)
ss_func
(
y
)
=
[
sum
(
y
),
sum
(
log
.
(
y
)),
sum
(
log
.
(
y
)
.^
2
)]
dist_l2
(
s_sim
,
s_obs
)
=
sqrt
(
dot
(
s_sim
,
s_obs
))
observations
=
simulate
(
m3
)
ss_observations
=
ss_func
(
observations
)
models
=
[
m1
,
m2
,
m3
]
abc_trainset
=
abc_model_choice_dataset
(
models
,
ss_observations
,
ss_func
,
dist_l2
,
29000
,
29000
)
abc_testset
=
abc_model_choice_dataset
(
models
,
ss_observations
,
ss_func
,
dist_l2
,
1000
,
1000
)
list_lh
=
[
lh_m1
,
lh_m2
,
lh_m3
]
prob_model
(
ss
::
Vector
,
list_lh
,
idx_model
)
=
list_lh
[
idx_model
](
ss
)
/
sum
([
list_lh
[
i
](
ss
)
for
i
=
eachindex
(
list_lh
)])
prob_model
(
ss
::
Vector
,
idx_model
)
=
prob_model
(
ss
,
list_lh
,
idx_model
)
prob_model3
(
ss
::
Vector
)
=
prob_model
(
ss
,
list_lh
,
3
)
```
%% Cell type:markdown id: tags:
# Plot
%% Cell type:code id: tags:
```
julia
using
Plots
p
=
plot
(
title
=
"Trainset"
)
colors
=
[
"black"
,
"red"
,
"green"
]
begin_idx
=
1
for
i
=
1
:
3
models_i
=
findall
(
x
->
x
==
i
,
abc_testset
.
models_indexes
)
nbr_obs
=
length
(
models_i
)
end_idx
=
begin_idx
+
nbr_obs
-
1
lh
=
list_lh
[
i
]
scatter!
(
p
,
begin_idx
:
end_idx
,
vec
(
mapslices
(
prob_model3
,
abc_testset
.
summary_stats_matrix
[
:
,
models_i
],
dims
=
1
)),
color
=
colors
[
i
],
markersize
=
3.0
,
markershape
=
:
cross
,
label
=
"Model
$
i"
)
global
begin_idx
=
end_idx
+
1
end
p
```
%% Cell type:markdown id: tags:
# Classification models
%% Cell type:code id: tags:
```
julia
using
ScikitLearn
@sk_import
linear_model
:
LogisticRegression
@sk_import
ensemble
:
RandomForestClassifier
@sk_import
metrics
:
(
classification_report
,
confusion_matrix
)
@sk_import
neighbors
:
KNeighborsClassifier
X_trainset
=
transpose
(
abc_trainset
.
X
)
X_testset
=
transpose
(
abc_testset
.
X
)
logit_reg
=
fit!
(
LogisticRegression
(),
X_trainset
,
abc_trainset
.
y
)
y_pred_logit
=
predict
(
logit_reg
,
X_testset
)
println
(
classification_report
(
y_pred
=
y_pred_logit
,
y_true
=
abc_testset
.
y
))
rf_clf
=
fit!
(
RandomForestClassifier
(
n_estimators
=
500
),
X_trainset
,
abc_trainset
.
y
)
y_pred_rf
=
predict
(
rf_clf
,
X_testset
)
println
(
classification_report
(
y_pred
=
y_pred_rf
,
y_true
=
abc_testset
.
y
))
knn_clf
=
fit!
(
KNeighborsClassifier
(
n_neighbors
=
20
),
X_trainset
,
abc_trainset
.
y
)
y_pred_knn
=
predict
(
rf_clf
,
X_testset
)
println
(
classification_report
(
y_pred
=
y_pred_rf
,
y_true
=
abc_testset
.
y
))
```
%% Cell type:markdown id: tags:
# RF ABC
%% Cell type:code id: tags:
```
julia
res_rf
=
rf_abc_model_choice
(
models
,
ss_observations
,
ss_func
,
29000
;
hyperparameters_range
=
Dict
(
:
n_estimators
=>
[
500
]))
println
(
classification_report
(
y_pred
=
predict
(
res_rf
.
clf
,
X_testset
),
y_true
=
abc_testset
.
y
))
println
(
confusion_matrix
(
y_pred
=
predict
(
res_rf
.
clf
,
X_testset
),
y_true
=
abc_testset
.
y
))
```
%% Cell type:code id: tags:
```
julia
dict_params
=
Dict
()
for
param
in
keys
(
get_params
(
res_rf
.
clf
))
dict_params
[
Symbol
(
param
)]
=
get_params
(
res_rf
.
clf
)[
param
]
end
RandomForestClassifier
(;
dict_params
...
)
```
%% Cell type:code id: tags:
```
julia
oob_votes
=
res_rf
.
clf
.
oob_decision_function_
y_pred_oob
=
argmax
.
([
oob_votes
[
i
,
:
]
for
i
=
1
:
size
(
oob_votes
)[
1
]])
@show
mean
(
y_pred_oob
.==
res_rf
.
reference_table
.
y
)
@show
res_rf
.
clf
.
oob_score_
```
%% Cell type:code id: tags:
```
julia
```
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment