ENV["LINES"] = 100
@time using Random: seed!
@time using LinearAlgebra
@time using StatsBase
@time using QuadGK
@time using StatsFuns
logmeanexp(x) = logsumexp(x) - log(length(x))
@time using StatsPlots
@time pyplot()
relax(t=0.2) = (backend() == Plots.PyPlotBackend() && PyPlot.clf(); sleep(t))
rd(x, d=3) = round(x; digits=d)
mrd(t, d=3) = map(x->rd(x, d), t)
@time using Turing
#turnprogress(false);
0.001326 seconds (1.35 k allocations: 72.297 KiB) 0.001262 seconds (1.35 k allocations: 73.062 KiB) 0.421434 seconds (492.48 k allocations: 27.323 MiB) 0.036551 seconds (52.66 k allocations: 2.846 MiB) 3.161826 seconds (6.80 M allocations: 319.277 MiB, 7.01% gc time) 18.629972 seconds (23.55 M allocations: 1.206 GiB, 2.71% gc time) 8.932715 seconds (17.96 M allocations: 863.120 MiB, 3.12% gc time) 42.442802 seconds (74.43 M allocations: 4.493 GiB, 3.52% gc time)
この節の内容についてはTuring公式ドキュメントの [https://turing.ml/dev/docs/using-turing/guide#access-values-inside-chain](Access Values inside Chain) を参照せよ.
Turingを使って,
chain = psample(model, NUTS(), 2000, 3)
のようにして作った chain
から名前が vars = [:a, :b, :c]
のパラメータの事後分布のサンプルを抜き出すには
val = chain[vars].value
として, AxisArray val
を作って,
W = vcat(val[:,:,m] for m in 1:size(val, 3))...)
と縦に連結すればよい. W
は Array{Real, 2}
型になる. さらに Real
を W
の成分の型に直すために
posterior_sample = Array{typeof(W[end,end]), 2}(W)
を実行する. これによって, 縦に長く, 横方向に :a, :b, :c
の順に並んだ配列ができる.
補足:
vars
はSymbolではなく, Stringの配列でもよい.w[1],...,w[10]
のすべてを取り出したければ vars = [:w]
でよい.w[1],...,w[5]
を取り出したければ vars = ["w[$i]" for i in 1:5]
とする."""
`get_posterior(chain::AbstractChains, vars::vars)` extracts the posterior sample of `vars` from `chain`.
Example:
```julia
chain = psample(model, NUTS(), 2000, 3)
posterior_sample = get_posterior_sample(chain, [:a, :b, :s])
```
"""
function get_posterior_sample(
chain::Turing.Inference.AbstractMCMC.AbstractChains,
vars::AbstractVector=get_vars(chain)
)
val = chain[vars].value
W = vcat((val[:,:,m] for m in 1:size(val, 3))...)
Array{typeof(W[end,end]), 2}(W)
end
"""
`get_vars(chain::AbstractChains)` returns the array of the names of the parameters in `chain`.
"""
function get_vars(chain::Turing.Inference.AbstractMCMC.AbstractChains)
vars = chain.name_map[:parameters]
end
"""
`get_vars(model::AbstractModel)` returns the array of the names of the parameters in `model`.
"""
function get_vars(model::Turing.Inference.AbstractMCMC.AbstractModel)
vi = DynamicPPL.VarInfo(linreg)
vars = collect(keys(DynamicPPL.tonamedtuple(vi)))
end
function get_posterior_sample(
chain::Turing.Inference.AbstractMCMC.AbstractChains,
model::Turing.Inference.AbstractMCMC.AbstractModel
)
get_posterior_sample(chain, get_vars(model))
end
display(@doc get_posterior_sample)
display(@doc get_vars)
get_posterior(chain::AbstractChains, vars::vars)
extracts the posterior sample of vars
from chain
.
Example:
chain = psample(model, NUTS(), 2000, 3)
posterior_sample = get_posterior_sample(chain, [:a, :b, :s])
get_vars(chain::AbstractChains)
returns the array of the names of the parameters in chain
.
get_vars(model::AbstractModel)
returns the array of the names of the parameters in model
.
データを $(X_k, Y_k)$ ($k=1,\ldots,n$) と書き, 確率モデルを $p(y|x,w)$ と書き, パラメーター $w$ に関する事後分布に関する平均と分散をそれぞれ $E_w[\;]$, $V_w[\;]$ と書くとき, WAICの定義は以下のように書ける:
$$ \begin{aligned} & T = -2\sum_{k=1}^n \log E_w[p(Y_k|X_k, w)], \\ & V = 2\sum_{k=1}^n V_w[\log p(Y_k|X_k, w)], \\ & \operatorname{WAIC} = T + V. \end{aligned} $$$p(Y_k|X_k,w)$ は $\exp(\log p(Y_k|X_k,w))$ と書けるので、$\log p(Y_k|X_k, w)$ からWAICは計算可能である. パラメーター $w$ に関する事後分布のサンプルを $w_l$ ($l=1,\ldots,L$) と書くとき, 上の $T$, $V$ は以下のように近似計算される:
$$ \begin{aligned} & T \approx -2\sum_{k=1}^n \log\,\operatorname{mean}\,\{\,\exp(\operatorname{lp}_{k,l})\mid l=1,\ldots,L\,\}, \\ & V \approx 2\sum_{k=1}^n \operatorname{var}\,\{\, \operatorname{lp}_{k,l}\mid l=1,\ldots,L\,\}. \end{aligned} $$ここで $\operatorname{mean}$ と $\operatorname{var}$ はそれぞれ標本平均と不偏分散を計算する函数であるとし,
$$ \operatorname{lp}_{k,l} = \log p(Y_k|X_k, w_l) $$とおいた. 以下のセルではこれをそのままJuliaのコードに翻訳する.
"""
`logpdf_array(logpdf_func, data, posterior_sample)` returns the array `lp` definef by
```julia
lp[k, l] = logpdf_func(data[k, :], posterior_sample[l, :])
```
"""
function logpdf_array(
logpdf_func,
data::AbstractArray,
posterior_sample::AbstractArray
)
n = size(data, 1)
L = size(posterior_sample, 1)
lp = Array{eltype(posterior_sample), 2}(undef, n, L)
for k in 1:n, l in 1:L
@views lp[k, l] = logpdf_func(data[k, :], posterior_sample[l, :])
end
lp
end
"""
`logpdf_array(logpdf_func, data, chain::AbstractChains, vars=get_vars(chain))` returns the array `lp` definef by
```julia
posterior_sample = get_posterior_sample(chain, vars)
lp[k, l] = logpdf_func(data[k, :], posterior_sample[l, :])
```
"""
function logpdf_array(
logpdf_func,
data::AbstractArray,
chain::Turing.Inference.AbstractMCMC.AbstractChains,
vars::AbstractVector=get_vars(chain)
)
posterior_sample = get_posterior_sample(chain, vars)
logpdf_array(logpdf_func, data, posterior_sample)
end
"""
`training_error(lp)` returns the training error calculated from the logpdf array `lp`.
"""
function training_error(lp::AbstractArray{R, 2}) where {R}
n, L = size(lp)
-2sum(logmeanexp(@view lp[k, :]) for k in 1:n)
end
"""
`functional_variance(lp)` returns the functional variance calculated from the logpdf array `lp`.
"""
function functional_variance(lp::AbstractArray{R, 2}) where {R}
n, L = size(lp)
2sum(var(@view lp[k,:]) for k in 1:n)
end
"""
`waic(lp)` returns the WAIC, the training error, and the functional variance calculated from the logpdf array `lp`.
"""
function waic(lp::AbstractArray{R, 2}) where {R}
T = training_error(lp)
V = functional_variance(lp)
WAIC = T + V
(WAIC=WAIC, T=T, V=V)
end
"""
`waic(logpdf_func, data, posterior_sample)` returns the WAIC, the training error, and the functional variance calculated from the arguments.
"""
function waic(
logpdf_func,
data::AbstractArray,
posterior_sample::AbstractArray)
lp = logpdf_array(logpdf_func, data, posterior_sample)
waic(lp)
end
"""
`waic(logpdf_func, data, chain, vars=get_vars(chain))` returns the WAIC, the training error, and the functional variance calculated from the arguments.
Example:
```julia
@model ols(X, Y) = begin
a ~ Flat()
b ~ Flat()
σ ~ FlatPos(0.0)
for k in eachindex(X)
Y[k] ~ Normal(a + b*X[k], σ)
end
end
n = 20
X = 3rand(n)
Y = X .+ 1 .+ 0.3randn(n)
chain_ols = psample(ols(X, Y), NUTS(), 2000, 3)
function logpdf_ols(x, w)
X, Y = x
a, b, σ = w
logpdf(Normal(a + b*X, σ), Y)
end
data_ols = hcat(X, Y)
vars_ols = [:a, :b, :σ]
@show waic(logpdf_ols, data_ols, chain_ols, vars_ols)
@show chain_ols
plot(chain_ols)
```
"""
function waic(
logpdf_func,
data::AbstractArray,
chain::Turing.Inference.AbstractMCMC.AbstractChains,
vars::AbstractVector=get_vars(chain)
)
posterior_sample = get_posterior_sample(chain, vars)
waic(logpdf_func, data, posterior_sample)
end
display(@doc logpdf_array)
display(@doc training_error)
display(@doc functional_variance)
display(@doc waic)
logpdf_array(logpdf_func, data, posterior_sample)
returns the array lp
definef by
lp[k, l] = logpdf_func(data[k, :], posterior_sample[l, :])
logpdf_array(logpdf_func, data, chain::AbstractChains, vars=get_vars(chain))
returns the array lp
definef by
posterior_sample = get_posterior_sample(chain, vars)
lp[k, l] = logpdf_func(data[k, :], posterior_sample[l, :])
training_error(lp)
returns the training error calculated from the logpdf array lp
.
functional_variance(lp)
returns the functional variance calculated from the logpdf array lp
.
waic(lp)
returns the WAIC, the training error, and the functional variance calculated from the logpdf array lp
.
waic(logpdf_func, data, posterior_sample)
returns the WAIC, the training error, and the functional variance calculated from the arguments.
waic(logpdf_func, data, chain, vars=get_vars(chain))
returns the WAIC, the training error, and the functional variance calculated from the arguments.
Example:
@model ols(X, Y) = begin
a ~ Flat()
b ~ Flat()
σ ~ FlatPos(0.0)
for k in eachindex(X)
Y[k] ~ Normal(a + b*X[k], σ)
end
end
n = 20
X = 3rand(n)
Y = X .+ 1 .+ 0.3randn(n)
chain_ols = psample(ols(X, Y), NUTS(), 2000, 3)
function logpdf_ols(x, w)
X, Y = x
a, b, σ = w
logpdf(Normal(a + b*X, σ), Y)
end
data_ols = hcat(X, Y)
vars_ols = [:a, :b, :σ]
@show waic(logpdf_ols, data_ols, chain_ols, vars_ols)
@show chain_ols
plot(chain_ols)
さらに事前分布を $\varphi(w)$ と書くと, データ全体から $k$ 番目の $(X_k, Y_k)$ を除いてできるデータから得られる事後分布は
$$ \varphi^*_k(w) = \frac {\varphi(w)\prod_{i\ne k} p(Y_i|X_i, w)} {\int \varphi(w)\prod_{i\ne k} p(Y_i|X_i, w)\,dw} $$と表わされる. これに関する平均を $E^k_w[\;]$ と書くことにする. このとき, 一個抜き出し交差検証(leave-one-out cross varidation, LOOCV)が次のように定義される:
$$ \operatorname{LOOCV} = -2\sum_{k=1}^n\log E^k_w[p(Y_k|X_k,w)] = -2\sum_{k=1}^n \int p(Y_k|X_k,w)\varphi^*_k(w)\,dw. $$以上のLOOCVの定義を以下のように変形できる:
$$ \begin{aligned} \operatorname{LOOCV} &= -2\sum_{k=1}^n\log \frac {\int \varphi(w)p(Y_k|X_k,w)\prod_{i\ne k} p(Y_i|X_i, w)\,dw} {\int \varphi(w)\prod_{i\ne k} p(Y_i|X_i, w)\,dw} \\ &= -2\sum_{k=1}^n \log \frac {\int \varphi(w)\prod_{i=1}^n p(Y_i|X_i, w)\,dw} {\int \varphi(w)p(Y_k|X_k,w)^{-1}\prod_{i=1}^n p(Y_i|X_i, w)\,dw} \\ &= -2\sum_{k=1}^n \log \left( \frac {\int \varphi(w)p(Y_k|X_k,w)^{-1}\prod_{i=1}^n p(Y_i|X_i, w)\,dw} {\int \varphi(w)\prod_{i=1}^n p(Y_i|X_i, w)\,dw} \right)^{-1} \\ &= -2\sum_{k=1}^n \log E_w\left[p(Y_k|X_k,w)^{-1}\right]^{-1} \\ &= 2\sum_{k=1}^n \log E_w\left[p(Y_k|X_k,w)^{-1}\right] \\ &= 2\sum_{k=1}^n \log E_w[\exp(-\log p(Y_k|X_k,w))]. \end{aligned} $$最初の等号で $\varphi^*_k(w)$ の定義を使った. 4つ目の等号で,
$$ E_w[f(w)] = \int f(w)\varphi^*(w)\,dw, \quad \varphi^*(w) = \frac {\varphi(w)\prod_{i=1}^n p(Y_i|X_i, w)} {\int \varphi(w)\prod_{i=1}^n p(Y_i|X_i, w)\,dw} $$を使い, 5つ目の等号で $\log x^{-1}=-\log x$ を使った.
上の変形の最後の式を使えば前節の記号のもとでLOOCVを次のように近似計算可能である:
$$ \operatorname{LOOCV} \approx 2\sum_{k=1}^n \log\,\operatorname{mean}\,\{\,\exp(-\log\operatorname{lp}_{k,l})\mid l=1,\ldots,L\,\}. $$"""
`loocv(lp)` returns the LOOCV from the logpdf array `lp`.
"""
function loocv(lp::AbstractArray{R, 2}) where {R}
n, L = size(lp)
2sum(logmeanexp(- @view lp[k, :]) for k in 1:n)
end
"""
`loocv(logpdf_func, data, posterior_sample)` returns the LOOCV calculated from the arguments.
"""
function loocv(
logpdf_func,
data::AbstractArray,
posterior_sample::AbstractArray)
lp = logpdf_array(logpdf_func, data, posterior_sample)
loocv(lp)
end
"""
`loocv(logpdf_func, data, chain, vars=get_vars(chain))` returns the LOOCV calculated from the arguments.
"""
function loocv(
logpdf_func,
data::AbstractArray,
chain::Turing.Inference.AbstractMCMC.AbstractChains,
vars::AbstractVector=get_vars(chain)
)
posterior_sample = get_posterior_sample(chain, vars)
loocv(logpdf_func, data, posterior_sample)
end
@doc loocv
loocv(lp)
returns the LOOCV from the logpdf array lp
.
loocv(logpdf_func, data, posterior_sample)
returns the LOOCV calculated from the arguments.
loocv(logpdf_func, data, chain, vars=get_vars(chain))
returns the LOOCV calculated from the arguments.
"""
`make_pred_pdf(logpdf_func, posterior_sample)` makes the pdf of the predictive distribution from the arguments.
"""
function make_pred_pdf(logpdf_func, posterior_sample::AbstractArray)
L = size(posterior_sample, 1)
pred_pdf(x) = mean(exp(logpdf_func(x, @view(posterior_sample[l, :]))) for l in 1:L)
pred_pdf
end
"""
`make_pred_pdf(logpdf_func, chain::AbstractChains, vars::AbstractVector)` makes the pdf of the predictive distribution from the arguments.
"""
function make_pred_pdf(
logpdf_func,
chain::Turing.Inference.AbstractMCMC.AbstractChains,
vars::AbstractVector=get_vars(chain)
)
posterior_sample = get_posterior_sample(chain, vars)
make_pred_pdf(logpdf_func, posterior_sample)
end
@doc make_pred_pdf
make_pred_pdf(logpdf_func, posterior_sample)
makes the pdf of the predictive distribution from the arguments.
make_pred_pdf(logpdf_func, chain::AbstractChains, vars::AbstractVector)
makes the pdf of the predictive distribution from the arguments.
seed!(4649)
posterior_sample_test = [5, 0] .+ 1.2randn(2, 100)
logpdf_test(x, w) = logpdf(Gamma(exp(w[1]), exp(w[2])), x[1])
pred_pdf_test = make_pred_pdf(logpdf_test, posterior_sample_test)
@show pred_pdf_test([5.0])
@show pred_pdf_test(5.0)
plot(size=(400, 250))
plot!(pred_pdf_test, 0, 50; label="pred_pdf_test")
pred_pdf_test([5.0]) = 0.01698634409454517 pred_pdf_test(5.0) = 0.01698634409454517
seed!(20200323);
@model ols(X, Y) = begin
a ~ Flat()
b ~ Flat()
σ ~ FlatPos(0.0)
for k in eachindex(X)
Y[k] ~ Normal(a + b*X[k], σ)
end
end
n = 20
X = 3rand(n)
Y = X .+ 1 .+ 0.3randn(n)
chain_ols = psample(ols(X, Y), NUTS(), 2000, 3)
function logpdf_ols(x, w)
X, Y = x
a, b, σ = w
logpdf(Normal(a + b*X, σ), Y)
end
data_ols = hcat(X, Y)
vars_ols = [:a, :b, :σ]
waic_ols = waic(logpdf_ols, data_ols, chain_ols, vars_ols)
loocv_ols = loocv(logpdf_ols, data_ols, chain_ols, vars_ols)
@show waic_ols
@show loocv_ols
@show chain_ols
plot(chain_ols; lw=0.5)
┌ Info: Found initial step size
│ ϵ = 0.05
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:08
waic_ols = (WAIC = 18.849114851565552, T = 14.08884910101609, V = 4.760265750549462) loocv_ols = 19.01539110495765 chain_ols = Object of type Chains, with data of type 1000×15×3 Array{Real,3} Iterations = 1:1000 Thinning interval = 1 Chains = 1, 2, 3 Samples per chain = 1000 internals = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth parameters = a, b, σ 2-element Array{ChainDataFrame,1} Summary Statistics parameters mean std naive_se mcse ess r_hat ────────── ────── ────── ──────── ────── ───────── ────── a 1.0951 0.1650 0.0030 0.0054 989.8906 1.0017 b 0.8655 0.1135 0.0021 0.0033 1102.7093 1.0009 σ 0.3785 0.0680 0.0012 0.0015 1534.5884 0.9995 Quantiles parameters 2.5% 25.0% 50.0% 75.0% 97.5% ────────── ────── ────── ────── ────── ────── a 0.7706 0.9880 1.0940 1.2039 1.4164 b 0.6406 0.7917 0.8659 0.9376 1.0872 σ 0.2752 0.3302 0.3691 0.4158 0.5289
posterior_sample_ols = get_posterior_sample(chain_ols, vars_ols)
a, b, σ = mapslices(median, posterior_sample_ols, dims=1)
@show a, b, σ
lp = logpdf_array(logpdf_ols, data_ols, posterior_sample_ols)
@show T = training_error(lp)
@show V = functional_variance(lp)
@show WAIC = T + V
@show LOOCV = loocv(lp)
relax()
display(waic(lp))
X, Y = data_ols[:,1], data_ols[:,2]
Phi = hcat(ones(length(X)), X)
a, b = Phi\Y
σ² = mean((Y - Phi*[a,b]).^2)
@show a, b, √σ²
T = -2sum(logpdf(Normal(Phi[k,:]'*[a,b], √σ²), Y[k]) for k in 1:length(X))
nparams = 3
@show T
@show 2nparams
@show AIC = T + 2nparams
relax()
(AIC=AIC, T=T, V=2nparams) |> display
(a, b, σ) = (1.0939559289468321, 0.8659305919345646, 0.3690821664936669) T = training_error(lp) = 14.08884910101609 V = functional_variance(lp) = 4.760265750549462 WAIC = T + V = 18.849114851565552 LOOCV = loocv(lp) = 19.01539110495765
(WAIC = 18.849114851565552, T = 14.08884910101609, V = 4.760265750549462)
(a, b, √σ²) = (1.0858106841809112, 0.8718885100467602, 0.33313315445256647) T = 12.78902109999629 2nparams = 6 AIC = T + 2nparams = 18.78902109999629
(AIC = 18.78902109999629, T = 12.78902109999629, V = 6)
plot(size=(400, 250))
scatter!(X, Y; label="sample")
plot!(x -> x + 1, -0.2, 3.2; label="x+1")
plot!(x -> x + 1 + 2*0.3, -0.2, 3.2; label="±2σ", color=:black, ls=:dot)
plot!(x -> x + 1 - 2*0.3, -0.2, 3.2; label="", color=:black, ls=:dot)
pred_pdf_ols = make_pred_pdf(logpdf_ols, posterior_sample_ols)
x = range(-0.5, 3.5; length=100)
y = range(0.5, 4.5; length=100)
@time z = ((x, y) -> pred_pdf_ols((x, y))).(x', y)
P1 = plot(size=(400, 400), legend=false)
plot!(xtick=-10:10, ytick=-10:10)
plot!(xlim=extrema(x), ylim=extrema(y))
heatmap!(x, y, z)
scatter!(X, Y; label="sample", color=:cyan)
x = range(-7, 10; length=100)
y = range(-6, 11; length=100)
@time z = ((x, y) -> pred_pdf_ols((x, y))).(x', y)
P2 = plot(size=(400, 400), legend=false)
plot!(xtick=-10:2:10, ytick=-10:2:10)
plot!(xlim=extrema(x), ylim=extrema(y))
heatmap!(x, y, z)
scatter!(X, Y; label="sample", color=:cyan)
plot(P1, P2; size=(800, 400), layout=(1, 2))
3.462194 seconds (30.62 M allocations: 1.371 GiB, 12.37% gc time) 3.145276 seconds (30.26 M allocations: 1.352 GiB, 12.28% gc time)
normal_dist(a, b) = Normal(a, exp(b))
logpdf_normal(x, w) = logpdf(normal_dist(w[1], w[2]), x[1])
@doc Normal
Normal(μ,σ)
The Normal distribution with mean μ
and standard deviation σ≥0
has probability density function
Note that if σ == 0
, then the distribution is a point mass concentrated at μ
. Though not technically a continuous distribution, it is allowed so as to account for cases where σ
may have underflowed, and the functions are defined by taking the pointwise limit as $σ → 0$.
Normal() # standard Normal distribution with zero mean and unit variance
Normal(mu) # Normal distribution with mean mu and unit variance
Normal(mu, sig) # Normal distribution with mean mu and variance sig^2
params(d) # Get the parameters, i.e. (mu, sig)
mean(d) # Get the mean, i.e. mu
std(d) # Get the standard deviation, i.e. sig
External links
gamma_dist(a, b) = Gamma(exp(a), exp(b))
logpdf_gamma(x, w) = logpdf(gamma_dist(w[1], w[2]), x[1])
@doc Gamma
Gamma(α,θ)
The Gamma distribution with shape parameter α
and scale θ
has probability density function
Gamma() # Gamma distribution with unit shape and unit scale, i.e. Gamma(1, 1)
Gamma(α) # Gamma distribution with shape α and unit scale, i.e. Gamma(α, 1)
Gamma(α, θ) # Gamma distribution with shape α and scale θ
params(d) # Get the parameters, i.e. (α, θ)
shape(d) # Get the shape parameter, i.e. α
scale(d) # Get the scale parameter, i.e. θ
External links
inversegamma_dist(a, b) = InverseGamma(exp(a), exp(b))
logpdf_inversegamma(x, w) = logpdf(inversegamma_dist(w[1], w[2]), x[1])
@doc InverseGamma
InverseGamma(α, θ)
The inverse Gamma distribution with shape parameter α
and scale θ
has probability density function
It is related to the Gamma
distribution: if $X \sim \operatorname{Gamma}(\alpha, \beta)$, then $1 / X \sim \operatorname{InverseGamma}(\alpha, \beta^{-1})$.
InverseGamma() # Inverse Gamma distribution with unit shape and unit scale, i.e. InverseGamma(1, 1)
InverseGamma(α) # Inverse Gamma distribution with shape α and unit scale, i.e. InverseGamma(α, 1)
InverseGamma(α, θ) # Inverse Gamma distribution with shape α and scale θ
params(d) # Get the parameters, i.e. (α, θ)
shape(d) # Get the shape parameter, i.e. α
scale(d) # Get the scale parameter, i.e. θ
External links
lognormal_dist(a, b) = LogNormal(a, exp(b))
logpdf_lognormal(x, w) = logpdf(lognormal_dist(w[1], w[2]), x[1])
@doc LogNormal
LogNormal(μ,σ)
The log normal distribution is the distribution of the exponential of a Normal
variate: if $X \sim \operatorname{Normal}(\mu, \sigma)$ then $\exp(X) \sim \operatorname{LogNormal}(\mu,\sigma)$. The probability density function is
LogNormal() # Log-normal distribution with zero log-mean and unit scale
LogNormal(mu) # Log-normal distribution with log-mean mu and unit scale
LogNormal(mu, sig) # Log-normal distribution with log-mean mu and scale sig
params(d) # Get the parameters, i.e. (mu, sig)
meanlogx(d) # Get the mean of log(X), i.e. mu
varlogx(d) # Get the variance of log(X), i.e. sig^2
stdlogx(d) # Get the standard deviation of log(X), i.e. sig
External links
weibull_dist(a, b) = Weibull(exp(a), exp(b))
logpdf_weibull(x, w) = logpdf(weibull_dist(w[1], w[2]), x[1])
@doc Weibull
Weibull(α,θ)
The Weibull distribution with shape α
and scale θ
has probability density function
Weibull() # Weibull distribution with unit shape and unit scale, i.e. Weibull(1, 1)
Weibull(a) # Weibull distribution with shape a and unit scale, i.e. Weibull(a, 1)
Weibull(a, b) # Weibull distribution with shape a and scale b
params(d) # Get the parameters, i.e. (a, b)
shape(d) # Get the shape parameter, i.e. a
scale(d) # Get the scale parameter, i.e. b
External links
@show dist_true = weibull_dist(1.5, 1/10)
seed!(4649)
n = 1000
X = rand(dist_true, n)
plot(size=(500, 350), legendfontsize=10)
histogram!(X; norm=true, alpha=0.3, label="sample")
plot!(dist_true, 0, 2; label="dist_true")
dist_true = weibull_dist(1.5, 1 / 10) = Weibull{Float64}(α=4.4816890703380645, θ=1.1051709180756477)
@model testmodel(X, dist_func) = begin
a ~ Flat()
b ~ Flat()
X .~ dist_func(a, b)
end
##testmodel#430 (generic function with 2 methods)
chain_normal = psample(testmodel(X, normal_dist), NUTS(), 2000, 3)
waic_normal = waic(logpdf_normal, X, chain_normal)
@show waic_normal
loocv_normal = loocv(logpdf_normal, X, chain_normal)
@show loocv_normal
println()
@show chain_normal
plot(chain_normal; lw=0.5)
┌ Info: Found initial step size
│ ϵ = 0.05
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:05
waic_normal = (WAIC = 143.7520736219336, T = 139.9459413074622, V = 3.8061323144714003) loocv_normal = 143.75090489485888 chain_normal = Object of type Chains, with data of type 1000×14×3 Array{Real,3} Iterations = 1:1000 Thinning interval = 1 Chains = 1, 2, 3 Samples per chain = 1000 internals = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth parameters = a, b 2-element Array{ChainDataFrame,1} Summary Statistics parameters mean std naive_se mcse ess r_hat ────────── ─────── ────── ──────── ────── ───────── ────── a 1.0024 0.0082 0.0001 0.0001 3076.7545 1.0005 b -1.3479 0.0220 0.0004 0.0004 3105.4581 0.9995 Quantiles parameters 2.5% 25.0% 50.0% 75.0% 97.5% ────────── ─────── ─────── ─────── ─────── ─────── a 0.9862 0.9969 1.0023 1.0078 1.0196 b -1.3910 -1.3631 -1.3472 -1.3328 -1.3052
chain_gamma = psample(testmodel(X, gamma_dist), NUTS(), 2000, 3)
waic_gamma = waic(logpdf_gamma, X, chain_gamma)
@show waic_gamma
loocv_gamma = loocv(logpdf_gamma, X, chain_gamma)
@show loocv_gamma
println()
@show chain_gamma
plot(chain_gamma; lw=0.5)
┌ Info: Found initial step size
│ ϵ = 0.025
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:14
waic_gamma = (WAIC = 242.55619795660647, T = 237.50715503275975, V = 5.049042923846727) loocv_gamma = 242.55334597773796 chain_gamma = Object of type Chains, with data of type 1000×14×3 Array{Real,3} Iterations = 1:1000 Thinning interval = 1 Chains = 1, 2, 3 Samples per chain = 1000 internals = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth parameters = a, b 2-element Array{ChainDataFrame,1} Summary Statistics parameters mean std naive_se mcse ess r_hat ────────── ─────── ────── ──────── ────── ──────── ────── a 2.5501 0.0433 0.0008 0.0018 714.8608 1.0022 b -2.5479 0.0443 0.0008 0.0019 715.1871 1.0024 Quantiles parameters 2.5% 25.0% 50.0% 75.0% 97.5% ────────── ─────── ─────── ─────── ─────── ─────── a 2.4626 2.5220 2.5504 2.5789 2.6387 b -2.6377 -2.5777 -2.5478 -2.5187 -2.4582
chain_inversegamma = psample(testmodel(X, inversegamma_dist), NUTS(), 2000, 3)
waic_inversegamma = waic(logpdf_inversegamma, X, chain_inversegamma)
@show waic_inversegamma
loocv_inversegamma = loocv(logpdf_inversegamma, X, chain_inversegamma)
@show loocv_inversegamma
println()
@show chain_inversegamma
plot(chain_inversegamma; lw=0.5)
┌ Info: Found initial step size
│ ϵ = 0.025
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:10
waic_inversegamma = (WAIC = 501.0669809819272, T = 491.5322296725455, V = 9.534751309381688) loocv_inversegamma = 501.0250547819082 chain_inversegamma = Object of type Chains, with data of type 1000×14×3 Array{Real,3} Iterations = 1:1000 Thinning interval = 1 Chains = 1, 2, 3 Samples per chain = 1000 internals = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth parameters = a, b 2-element Array{ChainDataFrame,1} Summary Statistics parameters mean std naive_se mcse ess r_hat ────────── ────── ────── ──────── ────── ──────── ────── a 2.3042 0.0427 0.0008 0.0016 740.5161 1.0009 b 2.2160 0.0435 0.0008 0.0016 748.1955 1.0011 Quantiles parameters 2.5% 25.0% 50.0% 75.0% 97.5% ────────── ────── ────── ────── ────── ────── a 2.2236 2.2742 2.3035 2.3347 2.3868 b 2.1303 2.1863 2.2144 2.2479 2.2996
chain_lognormal = psample(testmodel(X, lognormal_dist), NUTS(), 2000, 3)
waic_lognormal = waic(logpdf_lognormal, X, chain_lognormal)
@show waic_lognormal
loocv_lognormal = loocv(logpdf_lognormal, X, chain_lognormal)
@show loocv_lognormal
println()
@show chain_lognormal
plot(chain_lognormal; lw=0.5)
┌ Info: Found initial step size
│ ϵ = 0.025
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:03
waic_lognormal = (WAIC = 343.79099285707605, T = 336.58010836584594, V = 7.2108844912300984) loocv_lognormal = 343.78791583887903 chain_lognormal = Object of type Chains, with data of type 1000×14×3 Array{Real,3} Iterations = 1:1000 Thinning interval = 1 Chains = 1, 2, 3 Samples per chain = 1000 internals = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth parameters = a, b 2-element Array{ChainDataFrame,1} Summary Statistics parameters mean std naive_se mcse ess r_hat ────────── ─────── ────── ──────── ────── ───────── ────── a -0.0372 0.0094 0.0002 0.0002 3244.1229 0.9999 b -1.2116 0.0231 0.0004 0.0004 3111.7774 0.9992 Quantiles parameters 2.5% 25.0% 50.0% 75.0% 97.5% ────────── ─────── ─────── ─────── ─────── ─────── a -0.0550 -0.0435 -0.0371 -0.0307 -0.0195 b -1.2558 -1.2275 -1.2116 -1.1961 -1.1667
chain_weibull = psample(testmodel(X, weibull_dist), NUTS(), 2000, 3)
waic_weibull = waic(logpdf_weibull, X, chain_weibull)
@show waic_weibull
loocv_weibull = loocv(logpdf_weibull, X, chain_weibull)
@show loocv_weibull
println()
@show chain_weibull
plot(chain_weibull; lw=0.5)
┌ Info: Found initial step size
│ ϵ = 0.05
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
┌ Warning: The current proposal will be rejected due to numerical error(s).
│ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC C:\Users\genkuroki\.julia\packages\AdvancedHMC\haUrH\src\hamiltonian.jl:44
┌ Warning: The current proposal will be rejected due to numerical error(s).
│ isfinite.((θ, r, ℓπ, ℓκ)) = (true, true, true, false)
└ @ AdvancedHMC C:\Users\genkuroki\.julia\packages\AdvancedHMC\haUrH\src\hamiltonian.jl:44
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:07
waic_weibull = (WAIC = 135.8190696247778, T = 131.69550370400378, V = 4.123565920773999) loocv_weibull = 135.81696106444352 chain_weibull = Object of type Chains, with data of type 1000×14×3 Array{Real,3} Iterations = 1:1000 Thinning interval = 1 Chains = 1, 2, 3 Samples per chain = 1000 internals = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth parameters = a, b 2-element Array{ChainDataFrame,1} Summary Statistics parameters mean std naive_se mcse ess r_hat ────────── ────── ────── ──────── ────── ───────── ────── a 1.4748 0.0252 0.0005 0.0006 2326.7643 0.9993 b 0.0951 0.0076 0.0001 0.0002 2044.4410 0.9997 Quantiles parameters 2.5% 25.0% 50.0% 75.0% 97.5% ────────── ────── ────── ────── ────── ────── a 1.4247 1.4576 1.4744 1.4924 1.5230 b 0.0799 0.0899 0.0953 0.1002 0.1094
@show waic_normal |> mrd
@show waic_gamma |> mrd
@show waic_inversegamma |> mrd
@show waic_lognormal |> mrd
@show waic_weibull |> mrd
println()
@show loocv_normal |> mrd
@show loocv_gamma |> mrd
@show loocv_inversegamma |> mrd
@show loocv_lognormal |> mrd
@show loocv_weibull |> mrd
;
waic_normal |> mrd = (WAIC = 143.752, T = 139.946, V = 3.806) waic_gamma |> mrd = (WAIC = 242.556, T = 237.507, V = 5.049) waic_inversegamma |> mrd = (WAIC = 501.067, T = 491.532, V = 9.535) waic_lognormal |> mrd = (WAIC = 343.791, T = 336.58, V = 7.211) waic_weibull |> mrd = (WAIC = 135.819, T = 131.696, V = 4.124) loocv_normal |> mrd = 143.751 loocv_gamma |> mrd = 242.553 loocv_inversegamma |> mrd = 501.025 loocv_lognormal |> mrd = 343.788 loocv_weibull |> mrd = 135.817
pred_normal = make_pred_pdf(logpdf_normal, chain_normal)
pred_gamma = make_pred_pdf(logpdf_gamma, chain_gamma)
pred_inversegamma = make_pred_pdf(logpdf_inversegamma, chain_inversegamma)
pred_lognormal = make_pred_pdf(logpdf_lognormal, chain_lognormal)
pred_weibull = make_pred_pdf(logpdf_weibull, chain_weibull)
plot(legendfontsize=10)
histogram!(X; norm=true, alpha=0.2, label="sample")
plot!(x -> pred_weibull([x]), 0, 2.5; label="weibull", ls=:dash, lw=1.5)
plot!(x -> pred_normal([x]), 0, 2.5; label="normal", ls=:dashdot, lw=1.2)
plot!(x -> pred_gamma([x]), 0, 2.5; label="gamma", ls=:dot, lw=1.2)
plot!(x -> pred_inversegamma([x]), 0, 2.5; label="inversegamma", ls=:dot, lw=1.2)
plot!(x -> pred_lognormal([x]), 0, 2.5; label="lognormal", ls=:dot, lw=1.2)
plot!(dist_true, 0, 2.5; label="dist_true", color=:blue, alpha=0.7, lw=0.8)
tStartExposure = [-2, -18, -18, 10, 3, 8, 12, 13, -18, -18, 8, 10, -11, -18, -18, -18, 12, -18, -18, -18, -18, -18, -18, 11, 11, -18, -18, -18, -18, -18, 12, 12, 15, 15, -18, -18, -18, -18, 19, -18, -18, -18, -18, -18, -18, -18, 18, -18, -18, 6, -18, -18, -18, 9, -18, 11, -18, -18, -18, -18, -18, -18, -18, -18, -18, -18, 19, -18, -18, -18, -18, -18, -18, -18, 13, 13, -18, -18, -18, -18, -18, -18, 13, -18, -18, -18, -18, -18]
tEndExposure = [3, 12, 3, 11, 4, 16, 16, 16, 15, 15, 16, 11, 9, 2, 12, 17, 15, 17, 18, 13, 16, 11, 18, 14, 18, 20, 12, 10, 17, 15, 15, 14, 17, 20, 18, 13, 23, 23, 22, 20, 21, 21, 21, 15, 21, 17, 20, 18, 20, 7, 20, 20, 20, 20, 18, 22, 22, 18, 18, 18, 9, 20, 18, 22, 23, 19, 19, 22, 22, 13, 22, 22, 23, 23, 17, 17, 22, 18, 18, 22, 18, 20, 15, 20, 19, 23, 22, 22]
tSymptomOnset = [3, 15, 4, 14, 9, 16, 16, 16, 16, 16, 16, 14, 10, 10, 14, 20, 19, 19, 20, 20, 17, 13, 19, 19, 20, 21, 18, 18, 18, 16, 20, 16, 20, 20, 19, 17, 24, 24, 23, 21, 23, 23, 23, 21, 22, 24, 24, 19, 21, 14, 23, 23, 21, 20, 21, 22, 23, 19, 23, 21, 13, 22, 24, 25, 25, 25, 25, 24, 24, 21, 23, 23, 24, 25, 18, 18, 23, 22, 22, 24, 24, 25, 22, 25, 25, 24, 25, 25]
@show d = length(tStartExposure)
hcat(tStartExposure, tEndExposure, tSymptomOnset)'
d = length(tStartExposure) = 88
3×88 Adjoint{Int64,Array{Int64,2}}: -2 -18 -18 10 3 8 12 13 -18 … -18 13 -18 -18 -18 -18 -18 3 12 3 11 4 16 16 16 15 20 15 20 19 23 22 22 3 15 4 14 9 16 16 16 16 25 22 25 25 24 25 25
data_ip = hcat(tStartExposure', tEndExposure', tSymptomOnset')
size(data_ip)
(1, 264)
@model incubation_period(
dist_func,
tStartExposure,
tEndExposure,
tSymptomOnset,
::Type{VT}=Vector{Float64}
) where {VT} = begin
n = length(tStartExposure)
uE = VT(undef, n)
a ~ Flat()
b ~ Flat()
uE .~ Uniform()
for k in 1:n
tE = tStartExposure[k] + uE[k]*(tEndExposure[k] - tStartExposure[k])
tSymptomOnset[k] ~ LocationScale(tE, 1.0, dist_func(a, b))
end
end
model_ip(dist_func) = incubation_period(
dist_func,
tStartExposure,
tEndExposure,
tSymptomOnset
)
function logpdf_ip(x, w; dist_func=dist_func, d=length(tStartExposure))
a = w[1]
b = w[2]
uE = @view w[3:end]
sum(logpdf(dist_func(a, b), x[2d+k] - (x[k] + uE[k]*(x[d+k]-x[k]))) for k in 1:d)
end
logpdf_ip (generic function with 1 method)
L = 1000 + 1000
nchains = 3
3
@time chain_ip_gamma = psample(model_ip(gamma_dist), NUTS(), L, nchains);
┌ Info: Found initial step size
│ ϵ = 0.05
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
┌ Warning: The current proposal will be rejected due to numerical error(s).
│ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC C:\Users\genkuroki\.julia\packages\AdvancedHMC\haUrH\src\hamiltonian.jl:44
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:10:44
650.187142 seconds (6.56 G allocations: 1.728 TiB, 33.46% gc time)
#chain_ip_gamma
chain_ip_gamma[["a"; "b"; ["uE[$i]" for i in 1:10:d]]]
Object of type Chains, with data of type 1000×11×3 Array{Real,3} Iterations = 1:1000 Thinning interval = 1 Chains = 1, 2, 3 Samples per chain = 1000 parameters = a, b, uE[1], uE[11], uE[21], uE[31], uE[41], uE[51], uE[61], uE[71], uE[81] 2-element Array{ChainDataFrame,1} Summary Statistics parameters mean std naive_se mcse ess r_hat ────────── ────── ────── ──────── ────── ───────── ────── a 1.7228 0.3096 0.0057 0.0117 490.9173 1.0034 b 0.1612 0.3609 0.0066 0.0140 498.1078 1.0027 uE[1] 0.2569 0.1795 0.0033 0.0029 4105.3946 1.0000 uE[11] 0.3464 0.2030 0.0037 0.0030 3747.1291 0.9998 uE[21] 0.8373 0.0864 0.0016 0.0018 3512.9622 0.9997 uE[31] 0.5359 0.2846 0.0052 0.0034 4880.1431 0.9999 uE[41] 0.8808 0.0750 0.0014 0.0013 4165.0778 0.9994 uE[51] 0.8979 0.0722 0.0013 0.0013 3986.7499 0.9997 uE[61] 0.8746 0.1018 0.0019 0.0021 3624.6169 0.9993 uE[71] 0.8592 0.0737 0.0013 0.0010 3907.3316 0.9998 uE[81] 0.9254 0.0665 0.0012 0.0012 3450.9053 1.0000 Quantiles parameters 2.5% 25.0% 50.0% 75.0% 97.5% ────────── ─────── ─────── ────── ────── ────── a 1.0807 1.5080 1.7376 1.9358 2.3066 b -0.4899 -0.0826 0.1401 0.3950 0.9330 uE[1] 0.0130 0.1074 0.2235 0.3778 0.6638 uE[11] 0.0238 0.1792 0.3326 0.4955 0.7566 uE[21] 0.6356 0.7891 0.8507 0.9001 0.9669 uE[31] 0.0328 0.3000 0.5520 0.7807 0.9773 uE[41] 0.7012 0.8427 0.8936 0.9337 0.9867 uE[51] 0.7201 0.8580 0.9115 0.9515 0.9941 uE[61] 0.6048 0.8271 0.8977 0.9481 0.9948 uE[71] 0.6807 0.8206 0.8703 0.9115 0.9691 uE[81] 0.7583 0.8957 0.9429 0.9750 0.9976
#@time plot(chain_ip_gamma[2:2:end]; lw=0.5, alpha=0.8)
@time plot(chain_ip_gamma[["a"; "b"; ["uE[$i]" for i in 1:10:d]]][2:2:end]; lw=0.5, alpha=0.8)
0.974425 seconds (1.76 M allocations: 81.750 MiB)
dist_func = gamma_dist
waic_ip_gamma = waic(logpdf_ip, data_ip, chain_ip_gamma)
loocv_ip_gamma = loocv(logpdf_ip, data_ip, chain_ip_gamma)
@show waic_ip_gamma
@show loocv_ip_gamma;
waic_ip_gamma = (WAIC = 828.0910756394273, T = 345.5145214862969, V = 482.5765541531305) loocv_ip_gamma = 521.7608033574322
@time chain_ip_lognormal = psample(model_ip(lognormal_dist), NUTS(), L, nchains);
┌ Info: Found initial step size
│ ϵ = 0.2
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:06:53
417.389136 seconds (3.77 G allocations: 1016.697 GiB, 36.79% gc time)
#chain_ip_lognormal
chain_ip_lognormal[["a"; "b"; ["uE[$i]" for i in 1:10:d]]]
Object of type Chains, with data of type 1000×11×3 Array{Real,3} Iterations = 1:1000 Thinning interval = 1 Chains = 1, 2, 3 Samples per chain = 1000 parameters = a, b, uE[1], uE[11], uE[21], uE[31], uE[41], uE[51], uE[61], uE[71], uE[81] 2-element Array{ChainDataFrame,1} Summary Statistics parameters mean std naive_se mcse ess r_hat ────────── ─────── ────── ──────── ────── ───────── ────── a 1.8033 0.0897 0.0016 0.0024 1689.2387 1.0019 b -0.7701 0.1789 0.0033 0.0082 539.6518 1.0031 uE[1] 0.2456 0.1710 0.0031 0.0031 3976.5064 0.9994 uE[11] 0.3554 0.1909 0.0035 0.0039 2606.2328 1.0009 uE[21] 0.8279 0.1032 0.0019 0.0022 3090.9849 1.0022 uE[31] 0.5479 0.2871 0.0052 0.0042 4505.0488 0.9994 uE[41] 0.8718 0.0974 0.0018 0.0014 3635.0052 0.9991 uE[51] 0.8880 0.0924 0.0017 0.0016 3292.2488 0.9998 uE[61] 0.8651 0.1258 0.0023 0.0023 3474.5801 0.9996 uE[71] 0.8528 0.0912 0.0017 0.0016 3408.6556 0.9994 uE[81] 0.9088 0.0938 0.0017 0.0016 3146.7634 0.9996 Quantiles parameters 2.5% 25.0% 50.0% 75.0% 97.5% ────────── ─────── ─────── ─────── ─────── ─────── a 1.6367 1.7426 1.8006 1.8605 1.9913 b -1.1426 -0.8925 -0.7687 -0.6450 -0.4267 uE[1] 0.0090 0.1036 0.2204 0.3621 0.6256 uE[11] 0.0239 0.2029 0.3555 0.4950 0.7188 uE[21] 0.5610 0.7859 0.8483 0.8977 0.9560 uE[31] 0.0279 0.3089 0.5691 0.8002 0.9808 uE[41] 0.6296 0.8335 0.8944 0.9357 0.9893 uE[51] 0.6458 0.8529 0.9111 0.9519 0.9920 uE[61] 0.5270 0.8214 0.8996 0.9516 0.9958 uE[71] 0.6101 0.8162 0.8735 0.9139 0.9684 uE[81] 0.6534 0.8770 0.9371 0.9718 0.9981
#@time plot(chain_ip_lognormal[2:2:end]; lw=0.5, alpha=0.8)
@time plot(chain_ip_lognormal[["a"; "b"; ["uE[$i]" for i in 1:10:d]]][2:2:end]; lw=0.5, alpha=0.8)
0.191283 seconds (498.92 k allocations: 19.126 MiB)
dist_func = lognormal_dist
waic_ip_lognormal = waic(logpdf_ip, data_ip, chain_ip_lognormal)
loocv_ip_lognormal = loocv(logpdf_ip, data_ip, chain_ip_lognormal)
@show waic_ip_lognormal
@show loocv_ip_lognormal;
waic_ip_lognormal = (WAIC = 972.3905600179592, T = 340.6357318256695, V = 631.7548281922897) loocv_ip_lognormal = 538.6737465602005
@time chain_ip_weibull = psample(model_ip(weibull_dist), NUTS(), L, nchains);
┌ Info: Found initial step size
│ ϵ = 0.003125
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
┌ Warning: The current proposal will be rejected due to numerical error(s).
│ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC C:\Users\genkuroki\.julia\packages\AdvancedHMC\haUrH\src\hamiltonian.jl:44
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:07:10
434.839251 seconds (4.23 G allocations: 1.113 TiB, 32.28% gc time)
#chain_ip_weibull
chain_ip_weibull[["a"; "b"; ["uE[$i]" for i in 1:10:d]]]
Object of type Chains, with data of type 1000×11×3 Array{Real,3} Iterations = 1:1000 Thinning interval = 1 Chains = 1, 2, 3 Samples per chain = 1000 parameters = a, b, uE[1], uE[11], uE[21], uE[31], uE[41], uE[51], uE[61], uE[71], uE[81] 2-element Array{ChainDataFrame,1} Summary Statistics parameters mean std naive_se mcse ess r_hat ────────── ────── ────── ──────── ────── ───────── ────── a 1.0667 0.1770 0.0032 0.0057 586.4088 1.0029 b 1.9737 0.0795 0.0015 0.0026 1240.8634 1.0018 uE[1] 0.2730 0.2042 0.0037 0.0032 4263.9176 0.9992 uE[11] 0.3301 0.2105 0.0038 0.0029 4085.7993 0.9992 uE[21] 0.8386 0.0723 0.0013 0.0012 3432.4125 0.9995 uE[31] 0.5108 0.2805 0.0051 0.0041 4814.8101 0.9992 uE[41] 0.8814 0.0620 0.0011 0.0012 3859.4796 0.9997 uE[51] 0.8993 0.0616 0.0011 0.0010 3319.3782 1.0010 uE[61] 0.8838 0.0797 0.0015 0.0014 3076.8869 0.9998 uE[71] 0.8619 0.0624 0.0011 0.0011 3116.6802 1.0003 uE[81] 0.9376 0.0477 0.0009 0.0009 3274.0768 1.0006 Quantiles parameters 2.5% 25.0% 50.0% 75.0% 97.5% ────────── ────── ────── ────── ────── ────── a 0.7096 0.9468 1.0753 1.1841 1.4162 b 1.8355 1.9207 1.9671 2.0218 2.1505 uE[1] 0.0097 0.1053 0.2315 0.4022 0.7507 uE[11] 0.0147 0.1604 0.3011 0.4751 0.7875 uE[21] 0.6821 0.7960 0.8418 0.8903 0.9644 uE[31] 0.0310 0.2739 0.5225 0.7464 0.9727 uE[41] 0.7494 0.8445 0.8861 0.9268 0.9835 uE[51] 0.7583 0.8626 0.9056 0.9466 0.9935 uE[61] 0.6962 0.8367 0.8963 0.9439 0.9935 uE[71] 0.7268 0.8246 0.8657 0.9065 0.9689 uE[81] 0.8252 0.9122 0.9471 0.9731 0.9971
#@time plot(chain_ip_weibull[2:2:end]; lw=0.5, alpha=0.8)
@time plot(chain_ip_weibull[["a"; "b"; ["uE[$i]" for i in 1:10:d]]][2:2:end]; lw=0.5, alpha=0.8)
0.252323 seconds (499.00 k allocations: 19.127 MiB, 12.07% gc time)
dist_func = weibull_dist
waic_ip_weibull = waic(logpdf_ip, data_ip, chain_ip_weibull)
loocv_ip_weibull = loocv(logpdf_ip, data_ip, chain_ip_weibull)
@show waic_ip_weibull
@show loocv_ip_weibull;
waic_ip_weibull = (WAIC = 749.4770381373378, T = 336.23477843334075, V = 413.24225970399704) loocv_ip_weibull = 495.36764410126403
@show waic_ip_gamma |> mrd
@show waic_ip_lognormal |> mrd
@show waic_ip_weibull |> mrd
println()
@show loocv_ip_gamma |> mrd
@show loocv_ip_lognormal |> mrd
@show loocv_ip_weibull |> mrd
;
waic_ip_gamma |> mrd = (WAIC = 828.091, T = 345.515, V = 482.577) waic_ip_lognormal |> mrd = (WAIC = 972.391, T = 340.636, V = 631.755) waic_ip_weibull |> mrd = (WAIC = 749.477, T = 336.235, V = 413.242) loocv_ip_gamma |> mrd = 521.761 loocv_ip_lognormal |> mrd = 538.674 loocv_ip_weibull |> mrd = 495.368
内部パラメーター tE
を数値積分で消去して計算.
data_ipr = hcat(tStartExposure, tEndExposure, tSymptomOnset)
function logpdf_ipr(x, w; dist_func=dist_func)
log(quadgk(t -> pdf(dist_func(w[1], w[2]), x[3] - (x[1] + t*(x[2] - x[1]))), 0.0, 1.0)[1])
end
vars_ipr = [:a, :b]
@show vars_ipr;
vars_ipr = [:a, :b]
dist_func = gamma_dist
@time lp_ipr_gamma = logpdf_array(logpdf_ipr, data_ipr, chain_ip_gamma, vars_ipr)
waic_ipr_gamma = waic(lp_ipr_gamma)
loocv_ipr_gamma = loocv(lp_ipr_gamma)
@show waic_ipr_gamma
@show loocv_ipr_gamma;
9.666607 seconds (19.82 M allocations: 556.247 MiB, 1.85% gc time) waic_ipr_gamma = (WAIC = 599.1654924201086, T = 595.9300712884439, V = 3.2354211316647365) loocv_ipr_gamma = 599.1791611925086
dist_func = lognormal_dist
@time lp_ipr_lognormal = logpdf_array(logpdf_ipr, data_ipr, chain_ip_lognormal, vars_ipr)
waic_ipr_lognormal = waic(lp_ipr_lognormal)
loocv_ipr_lognormal = loocv(lp_ipr_lognormal)
@show waic_ipr_lognormal
@show loocv_ipr_lognormal;
3.418319 seconds (19.02 M allocations: 464.205 MiB, 3.95% gc time) waic_ipr_lognormal = (WAIC = 599.3777169513845, T = 595.9737004437031, V = 3.404016507681404) loocv_ipr_lognormal = 599.3925406158681
dist_func = weibull_dist
@time lp_ipr_weibull = logpdf_array(logpdf_ipr, data_ipr, chain_ip_weibull, vars_ipr)
waic_ipr_weibull = waic(lp_ipr_weibull)
loocv_ipr_weibull = loocv(lp_ipr_weibull)
@show waic_ipr_weibull
@show loocv_ipr_weibull;
8.177001 seconds (18.92 M allocations: 460.334 MiB, 1.09% gc time) waic_ipr_weibull = (WAIC = 599.5004579233479, T = 596.3009392417537, V = 3.19951868159425) loocv_ipr_weibull = 599.5113998705508
@show waic_ipr_gamma |> mrd
@show waic_ipr_lognormal |> mrd
@show waic_ipr_weibull |> mrd
println()
@show loocv_ipr_gamma |> mrd
@show loocv_ipr_lognormal |> mrd
@show loocv_ipr_weibull |> mrd
;
waic_ipr_gamma |> mrd = (WAIC = 599.165, T = 595.93, V = 3.235) waic_ipr_lognormal |> mrd = (WAIC = 599.378, T = 595.974, V = 3.404) waic_ipr_weibull |> mrd = (WAIC = 599.5, T = 596.301, V = 3.2) loocv_ipr_gamma |> mrd = 599.179 loocv_ipr_lognormal |> mrd = 599.393 loocv_ipr_weibull |> mrd = 599.511
w_gamma = get_posterior_sample(chain_ip_gamma, vars_ipr)
function pdf_ipr_gamma(t)
mean(pdf(gamma_dist(w_gamma[l,1], w_gamma[l,2]), t) for l in 1:size(w_gamma, 1))
end
w_lognormal = get_posterior_sample(chain_ip_lognormal, vars_ipr)
function pdf_ipr_lognormal(t)
mean(pdf(lognormal_dist(w_lognormal[l,1], w_lognormal[l,2]), t) for l in 1:size(w_lognormal, 1))
end
w_weibull = get_posterior_sample(chain_ip_weibull, vars_ipr)
function pdf_ipr_weibull(t)
mean(pdf(weibull_dist(w_weibull[l,1], w_weibull[l,2]), t) for l in 1:size(w_weibull, 1))
end
t = range(0, 25, length=400)
plot(size=(500, 300), legendfontsize=10)
plot!(xtick=0:25)
plot!(t, pdf_ipr_gamma.(t); label="gamma", ls=:solid)
plot!(t, pdf_ipr_lognormal.(t); label="lognormal", ls=:dash)
plot!(t, pdf_ipr_weibull.(t); label="weibull", ls=:dashdot)
内部パラメーター uE
を積分で消去した結果を cdf
函数で表示して計算.
擬似確率プログラミング言語でモデルは以下のように書ける:
$$ \begin{aligned} & a \sim \operatorname{Flat}(\mathbb{R}), \\ & b \sim \operatorname{Flat}(\mathbb{R}), \\ & u_k \sim \operatorname{Uniform}(0, 1), \\ & t_{\text{onset},k} - (t_{\text{start},k} + u_k(t_{\text{end},k} - t_{\text{start},k}))\sim \operatorname{ModelDist}(a, b) \quad (k=1,\ldots,n) \end{aligned} $$ここで, $t_{\text{start},k}, t_{\text{end},k}, t_{\text{onset}, k}$ がデータで, $a, b, u_k$ がモデルのパラメータであり, $\operatorname{ModelDist}$ は Gamma, LogNormal, Weibullのどれかである.
このモデルの内部パラメータ $u_k$ を消去しよう. まず, $\operatorname{IntegratedModelDist}(t_{\text{onset}}|a, b, t_{\text{start}}, t_{\text{end}})$ を
$$ \begin{aligned} & p_{\operatorname{IntegratedModelDist}}(t_{\text{onset}}|t_{\text{start}},t_{\text{end}},a,b) \\ & = \int_0^1 p_{\operatorname{ModelDist}} (t_{\text{onset}} - (t_{\text{start}} + u(t_{\text{end}} - t_{\text{start}}))|a,b) \,du \\ & = \begin{cases} p_{\operatorname{ModelDist}} (t_{\text{onset}} - t_{\text{start}}|a,b) & (t_{\text{end}} = t_{\text{start}}) \\ \dfrac { F_{\operatorname{ModelDist}}(t_{\text{onset}} - t_{\text{end}}|a,b) - F_{\operatorname{ModelDist}}(t_{\text{onset}} - t_{\text{start}}|a,b) } {-t_{\text{end}} + t_{\text{start}}} & (\text{otherwise}) \end{cases} \end{aligned} $$と定義する. ここで $F$ は累積分布函数を表す. 積分によって $u$ を削除したモデルは以下のように書ける:
$$ \begin{aligned} & a \sim \operatorname{Flat}(\mathbb{R}), \\ & b \sim \operatorname{Flat}(\mathbb{R}), \\ & t_{\text{onset},k} \sim \operatorname{IntegratedModelDist}(t_{\text{start},k}, t_{\text{end},k}, a, b) \quad (k=1,\ldots,n) \end{aligned} $$このパラメータが2個のモデルとして, WAICやLOOCVは計算されなければいけない.
data_ip2 = hcat(tStartExposure, tEndExposure, tSymptomOnset)
function logpdf_ip2(x, w; dist_func=dist_func)
z = if x[1] == x[2]
pdf(dist_func(w[1], w[2]), x[3] - x[1])
else
(cdf(dist_func(w[1], w[2]), x[3] - x[2]) - cdf(dist_func(w[1], w[2]), x[3] - x[1]))/(-x[2] + x[1])
end
log(z)
end
vars_ip2 = [:a, :b]
@show vars_ip2;
vars_ip2 = [:a, :b]
k, l = 67, 1234
println((k = k, data_ip2_k_1 = data_ip2[k,1], data_ip2_k_2 = data_ip2[k,2]))
dist_func = gamma_dist
ps_gamma = get_posterior_sample(chain_ip_gamma, vars_ip2)
@show logpdf_ipr(data_ip2[k,:], ps_gamma[l,:])
@show logpdf_ip2(data_ip2[k,:], ps_gamma[l,:])
dist_func = lognormal_dist
ps_gamma = get_posterior_sample(chain_ip_lognormal, vars_ip2)
@show logpdf_ipr(data_ip2[k,:], ps_gamma[l,:])
@show logpdf_ip2(data_ip2[k,:], ps_gamma[l,:])
dist_func = weibull_dist
ps_gamma = get_posterior_sample(chain_ip_weibull, vars_ip2)
@show logpdf_ipr(data_ip2[k,:], ps_gamma[l,:])
@show logpdf_ip2(data_ip2[k,:], ps_gamma[l,:])
;
(k = 67, data_ip2_k_1 = 19, data_ip2_k_2 = 19) logpdf_ipr(data_ip2[k, :], ps_gamma[l, :]) = -2.0502878321516813 logpdf_ip2(data_ip2[k, :], ps_gamma[l, :]) = -2.0502878321516813 logpdf_ipr(data_ip2[k, :], ps_gamma[l, :]) = -2.057468864082485 logpdf_ip2(data_ip2[k, :], ps_gamma[l, :]) = -2.057468864082485 logpdf_ipr(data_ip2[k, :], ps_gamma[l, :]) = -2.0894116910323914 logpdf_ip2(data_ip2[k, :], ps_gamma[l, :]) = -2.0894116910323914
k, l = 10, 1234
println((k = k, data_ip2_k_1 = data_ip2[k,1], data_ip2_k_2 = data_ip2[k,2]))
dist_func = gamma_dist
ps_gamma = get_posterior_sample(chain_ip_gamma, vars_ip2)
@show logpdf_ipr(data_ip2[k,:], ps_gamma[l,:])
@show logpdf_ip2(data_ip2[k,:], ps_gamma[l,:])
dist_func = lognormal_dist
ps_lognormal = get_posterior_sample(chain_ip_lognormal, vars_ip2)
@show logpdf_ipr(data_ip2[k,:], ps_lognormal[l,:])
@show logpdf_ip2(data_ip2[k,:], ps_lognormal[l,:])
dist_func = weibull_dist
ps_weibull = get_posterior_sample(chain_ip_weibull, vars_ip2)
@show logpdf_ipr(data_ip2[k,:], ps_weibull[l,:])
@show logpdf_ip2(data_ip2[k,:], ps_weibull[l,:])
;
(k = 10, data_ip2_k_1 = -18, data_ip2_k_2 = 15) logpdf_ipr(data_ip2[k, :], ps_gamma[l, :]) = -3.497768038056459 logpdf_ip2(data_ip2[k, :], ps_gamma[l, :]) = -3.4977680380564586 logpdf_ipr(data_ip2[k, :], ps_lognormal[l, :]) = -3.4972477778287336 logpdf_ip2(data_ip2[k, :], ps_lognormal[l, :]) = -3.4972477778287336 logpdf_ipr(data_ip2[k, :], ps_weibull[l, :]) = -3.5045217380470794 logpdf_ip2(data_ip2[k, :], ps_weibull[l, :]) = -3.50452173804708
dist_func = gamma_dist
@time lp_ip2_gamma = logpdf_array(logpdf_ip2, data_ip2, chain_ip_gamma, vars_ip2)
waic_ip2_gamma = waic(lp_ip2_gamma)
loocv_ip2_gamma = loocv(lp_ip2_gamma)
@show waic_ip2_gamma
@show loocv_ip2_gamma;
0.467945 seconds (934.50 k allocations: 35.991 MiB, 8.06% gc time) waic_ip2_gamma = (WAIC = 599.1654924201018, T = 595.9300712884362, V = 3.235421131665539) loocv_ip2_gamma = 599.1791611925016
dist_func = lognormal_dist
@time lp_ip2_lognormal = logpdf_array(logpdf_ip2, data_ip2, chain_ip_lognormal, vars_ip2)
waic_ip2_lognormal = waic(lp_ip2_lognormal)
loocv_ip2_lognormal = loocv(lp_ip2_lognormal)
@show waic_ip2_lognormal
@show loocv_ip2_lognormal;
0.130799 seconds (867.63 k allocations: 32.887 MiB) waic_ip2_lognormal = (WAIC = 599.3777169514955, T = 595.9737004438692, V = 3.404016507626242) loocv_ip2_lognormal = 599.3925406159791
dist_func = weibull_dist
@time lp_ip2_weibull = logpdf_array(logpdf_ip2, data_ip2, chain_ip_weibull, vars_ip2)
waic_ip2_weibull = waic(lp_ip2_weibull)
loocv_ip2_weibull = loocv(lp_ip2_weibull)
@show waic_ip2_weibull
@show loocv_ip2_weibull;
0.170888 seconds (867.16 k allocations: 32.866 MiB) waic_ip2_weibull = (WAIC = 599.5004579231576, T = 596.3009392415395, V = 3.1995186816180534) loocv_ip2_weibull = 599.5113998703594
@show waic_ip2_gamma |> mrd
@show waic_ip2_lognormal |> mrd
@show waic_ip2_weibull |> mrd
println()
@show loocv_ip2_gamma |> mrd
@show loocv_ip2_lognormal |> mrd
@show loocv_ip2_weibull |> mrd
;
waic_ip2_gamma |> mrd = (WAIC = 599.165, T = 595.93, V = 3.235) waic_ip2_lognormal |> mrd = (WAIC = 599.378, T = 595.974, V = 3.404) waic_ip2_weibull |> mrd = (WAIC = 599.5, T = 596.301, V = 3.2) loocv_ip2_gamma |> mrd = 599.179 loocv_ip2_lognormal |> mrd = 599.393 loocv_ip2_weibull |> mrd = 599.511
計算速度が圧倒的に速くなっていてかつ, 以下に再掲する数値積分(quadgk函数)を使った計算の結果(既出)と値が一致している.
@show waic_ipr_gamma |> mrd
@show waic_ipr_lognormal |> mrd
@show waic_ipr_weibull |> mrd
println()
@show loocv_ipr_gamma |> mrd
@show loocv_ipr_lognormal |> mrd
@show loocv_ipr_weibull |> mrd
;
waic_ipr_gamma |> mrd = (WAIC = 599.165, T = 595.93, V = 3.235) waic_ipr_lognormal |> mrd = (WAIC = 599.378, T = 595.974, V = 3.404) waic_ipr_weibull |> mrd = (WAIC = 599.5, T = 596.301, V = 3.2) loocv_ipr_gamma |> mrd = 599.179 loocv_ipr_lognormal |> mrd = 599.393 loocv_ipr_weibull |> mrd = 599.511