Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
MarkovProcesses.jl
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Bentriou Mahmoud
MarkovProcesses.jl
Commits
97df1230
Commit
97df1230
authored
3 years ago
by
Bentriou Mahmoud
Browse files
Options
Downloads
Patches
Plain Diff
implementation of abc model choice with random forest
parent
26228335
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
algorithms/abc_model_choice.jl
+90
-0
90 additions, 0 deletions
algorithms/abc_model_choice.jl
core/MarkovProcesses.jl
+17
-2
17 additions, 2 deletions
core/MarkovProcesses.jl
tests/abc_model_choice/toy_example.jl
+72
-0
72 additions, 0 deletions
tests/abc_model_choice/toy_example.jl
with
179 additions
and
2 deletions
algorithms/abc_model_choice.jl
0 → 100644
+
90
−
0
View file @
97df1230
struct
AbcModelChoiceDataset
models_indexes
::
Vector
{
Int
}
summary_stats_vector
::
Vector
epsilon
::
Float64
end
struct
RandomForestABC
reference_table
::
AbcModelChoiceDataset
clf
summary_stats_observations
estim_model
end
function
getproperty
(
dataset
::
AbcModelChoiceDataset
,
sym
::
Symbol
)
if
sym
==
:
X
return
dataset
.
summary_stats_vector
elseif
sym
==
:
y
return
dataset
.
models_indexes
else
return
getfield
(
dataset
,
sym
)
end
end
function
abc_model_choice_dataset
(
models
::
Vector
{
<:
Union
{
Model
,
ParametricModel
}},
summary_stats_observations
,
summary_stats_func
::
Function
,
distance_func
::
Function
,
k
::
Int
,
N
::
Int
)
nbr_models
=
length
(
models
)
models_prior
=
Categorical
([
1
/
nbr_models
for
i
=
1
:
nbr_models
])
return
abc_model_choice_dataset
(
models
,
models_prior
,
summary_stats_observations
,
summary_stats_func
,
distance_func
,
k
,
N
)
end
function
abc_model_choice_dataset
(
models
::
Vector
{
<:
Union
{
Model
,
ParametricModel
}},
models_prior
::
DiscreteUnivariateDistribution
,
summary_stats_observations
,
summary_stats_func
::
Function
,
distance_func
::
Function
,
k
::
Int
,
N
::
Int
)
@assert
length
(
models
)
>=
2
"Should contain at least 2 models"
@assert
ncategories
(
models_prior
)
==
length
(
models
)
"Number of categories of models' prior and number of models do not equal"
models_indexes
=
zeros
(
Int
,
N
)
summary_stats_vector
=
Vector
{
typeof
(
summary_stats_observations
)}(
undef
,
N
)
distances
=
zeros
(
N
)
bool_parametric
=
typeof
(
models
)
<:
Vector
{
ParametricModel
}
for
i
=
1
:
N
current_idx_model
=
rand
(
models_prior
)
models_indexes
[
i
]
=
current_idx_model
if
bool_parametric
param_model
=
models
[
current_idx_model
]
vec_p
=
rand
(
param_model
.
distribution
)
sim
=
simulate
(
param_model
,
vec_p
)
else
sim
=
simulate
(
models
[
current_idx_model
])
end
summary_stats_vector
[
i
]
=
summary_stats_func
(
sim
)
distances
[
i
]
=
distance_func
(
summary_stats_vector
[
i
],
summary_stats_observations
)
end
k_nn
=
sortperm
(
distances
,
alg
=
QuickSort
)[
1
:
k
]
return
AbcModelChoiceDataset
(
models_indexes
[
k_nn
],
summary_stats_vector
[
k_nn
],
distances
[
k_nn
[
end
]])
end
function
rf_abc_model_choice
(
models
::
Vector
{
<:
Union
{
Model
,
ParametricModel
}},
summary_stats_observations
,
summary_stats_func
::
Function
,
N_ref
::
Int
;
k
::
Int
=
N_ref
,
distance_func
::
Function
=
(
x
,
y
)
->
1
,
hyperparameters_range
::
Dict
=
Dict
(
:
n_estimators
=>
[
200
],
:
min_samples_leaf
=>
[
1
,
2
],
:
min_samples_split
=>
[
2
,
5
]))
@assert
k
<=
N_ref
trainset
=
abc_model_choice_dataset
(
models
,
summary_stats_observations
,
summary_stats_func
,
distance_func
,
k
,
N_ref
)
gridsearch
=
GridSearchCV
(
RandomForestClassifier
(
oob_score
=
true
),
hyperparameters_range
)
fit!
(
gridsearch
,
trainset
.
X
,
trainset
.
y
)
best_rf
=
gridsearch
.
best_estimator_
return
RandomForestABC
(
trainset
,
best_rf
,
summary_stats_observations
,
predict
(
best_rf
,
[
summary_stats_observations
]))
end
# P(m = m^(ss_obs) | ss_obs) estimate
function
posterior_proba_model
(
rf_abc
::
RandomForestABC
)
oob_votes
=
rf_abc
.
clf
.
oob_decision_function_
y_pred_oob
=
argmax
.
([
oob_votes
[
i
,
:
]
for
i
=
1
:
size
(
oob_votes
)[
1
]])
y_oob_regression
=
y_pred_oob
.!=
rf_abc
.
reference_table
.
y
dict_params
=
Dict
()
for
param
in
[
"n_estimators"
,
"min_samples_leaf"
,
"min_samples_split"
,
"oob_score"
]
dict_params
[
Symbol
(
param
)]
=
get_params
(
rf_abc
.
clf
)[
param
]
end
rf_regressor
=
RandomForestRegressor
(;
dict_params
...
)
fit!
(
rf_regressor
,
rf_abc
.
reference_table
.
X
,
y_oob_regression
)
return
1
-
predict
(
rf_regressor
,
[
rf_abc
.
summary_stats_observations
])[
1
]
end
This diff is collapsed.
Click to expand it.
core/MarkovProcesses.jl
+
17
−
2
View file @
97df1230
...
...
@@ -8,12 +8,25 @@ import Base: setindex!, setproperty!, fill!, copyto!
import
Dates
import
Distributed
:
@everywhere
,
@distributed
import
Distributions
:
Product
,
Uniform
,
Normal
import
Distributions
:
Distribution
,
Univariate
,
Continuous
,
UnivariateDistribution
,
import
Distributions
:
Distribution
,
Univariate
,
Continuous
,
UnivariateDistribution
,
DiscreteUnivariateDistribution
,
MultivariateDistribution
,
product_distribution
import
Distributions
:
insupport
,
isbounded
,
pdf
import
Distributions
:
insupport
,
isbounded
,
ncategories
,
pdf
import
FunctionWrappers
:
FunctionWrapper
import
Random
:
rand
,
rand!
import
ScikitLearn
import
ScikitLearn
:
fit!
,
predict
,
get_params
import
ScikitLearn
.
GridSearch
:
GridSearchCV
import
StaticArrays
:
SVector
,
@SVector
# Python objects import
import
PyCall
:
PyNULL
const
RandomForestClassifier
=
PyNULL
()
const
RandomForestRegressor
=
PyNULL
()
function
__init__
()
(
ScikitLearn
.
Skcore
)
.
import_sklearn
()
copy!
(
RandomForestClassifier
,
(
ScikitLearn
.
Skcore
.
pyimport
(
"sklearn.ensemble"
))
.
RandomForestClassifier
)
copy!
(
RandomForestRegressor
,
(
ScikitLearn
.
Skcore
.
pyimport
(
"sklearn.ensemble"
))
.
RandomForestRegressor
)
end
## Exports
export
Distribution
,
Product
,
Uniform
,
Normal
...
...
@@ -52,6 +65,7 @@ export load_model, load_automaton, load_plots
# Algorithms
export
automaton_abc
,
abc_smc
export
abc_model_choice_dataset
,
rf_abc_model_choice
,
posterior_proba_model
# About biochemical networks
export
@network_model
...
...
@@ -63,6 +77,7 @@ include("model.jl")
include
(
"utils.jl"
)
include
(
"network_model.jl"
)
include
(
"../algorithms/abc_smc.jl"
)
include
(
"../algorithms/abc_model_choice.jl"
)
end
This diff is collapsed.
Click to expand it.
tests/abc_model_choice/toy_example.jl
0 → 100644
+
72
−
0
View file @
97df1230
using
SpecialFunctions
using
LinearAlgebra
using
Random
using
Distributions
using
MarkovProcesses
using
ScikitLearn
@sk_import
metrics
:
(
accuracy_score
,
classification_report
)
global
n
=
20
struct
Model1
<:
Model
end
struct
Model2
<:
Model
end
struct
Model3
<:
Model
end
import
MarkovProcesses
:
simulate
function
simulate
(
m
::
Model1
)
param
=
rand
(
Exponential
(
1
))
return
rand
(
Exponential
(
param
),
n
)
end
function
simulate
(
m
::
Model2
)
param
=
rand
(
Normal
())
return
rand
(
LogNormal
(
param
,
1
),
n
)
end
function
simulate
(
m
::
Model3
)
param
=
rand
(
Exponential
(
1
))
return
rand
(
Gamma
(
2
,
1
/
param
),
n
)
end
m1
,
m2
,
m3
=
Model1
(),
Model2
(),
Model3
()
lh_m1
(
s
)
=
exp
(
log
(
gamma
(
n
+
1
))
-
(
n
+
1
)
*
log
(
1
+
s
[
1
]))
lh_m2
(
s
)
=
exp
(
-
s
[
2
]
^
2
/
(
2
n
*
(
n
+
1
))
-
(
s
[
3
]
^
2
)
/
2
+
(
s
[
2
]
^
2
)
/
(
2
n
)
-
s
[
2
])
*
(
2
pi
)
^
(
-
n
/
2
)
*
(
n
+
1
)
^
(
-
1
/
2
)
lh_m3
(
s
)
=
exp
(
s
[
2
])
*
gamma
(
2
n
+
1
)
/
gamma
(
2
)
^
n
*
(
1
+
s
[
1
])
^
(
-
2
n
-
1
)
ss_func
(
y
)
=
[
sum
(
y
),
sum
(
log
.
(
y
)),
sum
(
log
.
(
y
)
.^
2
)]
dist_l2
(
s_sim
,
s_obs
)
=
sqrt
(
dot
(
s_sim
,
s_obs
))
observations
=
simulate
(
m3
)
ss_observations
=
ss_func
(
observations
)
models
=
[
m1
,
m2
,
m3
]
abc_testset
=
abc_model_choice_dataset
(
models
,
ss_observations
,
ss_func
,
dist_l2
,
1000
,
1000
)
list_lh
=
[
lh_m1
,
lh_m2
,
lh_m3
]
prob_model
(
ss
,
list_lh
,
idx_model
)
=
list_lh
[
idx_model
](
ss
)
/
sum
([
list_lh
[
i
](
ss
)
for
i
=
eachindex
(
list_lh
)])
prob_model
(
ss
,
idx_model
)
=
prob_model
(
ss
,
list_lh
,
idx_model
)
#=
using Plots
p = plot(title="Trainset")
colors = ["black", "red", "green"]
begin_idx = 1
for i = 1:3
models_i = findall(x->x==i, abc_trainset.models_indexes)
nbr_obs = length(models_i)
end_idx = begin_idx + nbr_obs - 1
lh = list_lh[i]
@show i
@show nbr_obs
@show begin_idx:end_idx
scatter!(p, begin_idx:end_idx, prob_model.(abc_trainset.summary_stats_vector[models_i], 3),
color = colors[i], markersize = 3.0, markershape = :cross, label = "Model $i")
global begin_idx = end_idx + 1
end
savefig("set.svg")
=#
grid
=
Dict
(
:
n_estimators
=>
[
500
],
:
min_samples_leaf
=>
[
1
],
:
min_samples_split
=>
[
2
])
res_rf_abc
=
rf_abc_model_choice
(
models
,
ss_observations
,
ss_func
,
29000
;
hyperparameters_range
=
grid
)
@show
posterior_proba_model
(
res_rf_abc
)
println
(
classification_report
(
y_true
=
abc_testset
.
y
,
y_pred
=
predict
(
res_rf_abc
.
clf
,
abc_testset
.
X
)))
@show
accuracy_score
(
abc_testset
.
y
,
predict
(
res_rf_abc
.
clf
,
abc_testset
.
X
))
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment