Notebook

TuringでWAICやLOOCVを計算する方法¶

黒木玄

2020-03-23～2020-03-25

ENV["LINES"] = 100

@time using Random: seed!
@time using LinearAlgebra
@time using StatsBase
@time using QuadGK

@time using StatsFuns
logmeanexp(x) = logsumexp(x) - log(length(x))

@time using StatsPlots
@time pyplot()
relax(t=0.2) = (backend() == Plots.PyPlotBackend() && PyPlot.clf(); sleep(t))

rd(x, d=3) = round(x; digits=d)
mrd(t, d=3) = map(x->rd(x, d), t)

@time using Turing
#turnprogress(false);

  0.001326 seconds (1.35 k allocations: 72.297 KiB)
  0.001262 seconds (1.35 k allocations: 73.062 KiB)
  0.421434 seconds (492.48 k allocations: 27.323 MiB)
  0.036551 seconds (52.66 k allocations: 2.846 MiB)
  3.161826 seconds (6.80 M allocations: 319.277 MiB, 7.01% gc time)
 18.629972 seconds (23.55 M allocations: 1.206 GiB, 2.71% gc time)
  8.932715 seconds (17.96 M allocations: 863.120 MiB, 3.12% gc time)
 42.442802 seconds (74.43 M allocations: 4.493 GiB, 3.52% gc time)

WAICやLOOCVを計算する函数の定義¶

Turingが生成するchainから事後分布のサンプルを抽出¶

この節の内容についてはTuring公式ドキュメントの [https://turing.ml/dev/docs/using-turing/guide#access-values-inside-chain](Access Values inside Chain) を参照せよ.

Turingを使って,

chain = psample(model, NUTS(), 2000, 3)

のようにして作った chain から名前が vars = [:a, :b, :c] のパラメータの事後分布のサンプルを抜き出すには

val = chain[vars].value

として, AxisArray val を作って,

W = vcat(val[:,:,m] for m in 1:size(val, 3))...)

と縦に連結すればよい. W は Array{Real, 2} 型になる. さらに Real を W の成分の型に直すために

posterior_sample = Array{typeof(W[end,end]), 2}(W)

を実行する. これによって, 縦に長く, 横方向に :a, :b, :c の順に並んだ配列ができる.

補足:

vars はSymbolではなく, Stringの配列でもよい.
w[1],...,w[10] のすべてを取り出したければ vars = [:w] でよい.
そのうちの w[1],...,w[5] を取り出したければ vars = ["w[$i]" for i in 1:5] とする.

In [2]:

"""
`get_posterior(chain::AbstractChains, vars::vars)` extracts the posterior sample of `vars` from `chain`.

Example:

```julia
chain = psample(model, NUTS(), 2000, 3)
posterior_sample = get_posterior_sample(chain, [:a, :b, :s])
```
"""
function get_posterior_sample(
        chain::Turing.Inference.AbstractMCMC.AbstractChains,
        vars::AbstractVector=get_vars(chain)
    )
    val = chain[vars].value
    W = vcat((val[:,:,m] for m in 1:size(val, 3))...)
    Array{typeof(W[end,end]), 2}(W)
end

"""
`get_vars(chain::AbstractChains)` returns the array of the names of the parameters in `chain`.
"""
function get_vars(chain::Turing.Inference.AbstractMCMC.AbstractChains)
    vars = chain.name_map[:parameters]
end

"""
`get_vars(model::AbstractModel)` returns the array of the names of the parameters in `model`.
"""
function get_vars(model::Turing.Inference.AbstractMCMC.AbstractModel)
    vi = DynamicPPL.VarInfo(linreg)
    vars = collect(keys(DynamicPPL.tonamedtuple(vi)))
end

function get_posterior_sample(
        chain::Turing.Inference.AbstractMCMC.AbstractChains,
        model::Turing.Inference.AbstractMCMC.AbstractModel
    )
    get_posterior_sample(chain, get_vars(model))
end

display(@doc get_posterior_sample)
display(@doc get_vars)

get_posterior(chain::AbstractChains, vars::vars) extracts the posterior sample of vars from chain.

Example:

chain = psample(model, NUTS(), 2000, 3)
posterior_sample = get_posterior_sample(chain, [:a, :b, :s])

get_vars(chain::AbstractChains) returns the array of the names of the parameters in chain.

get_vars(model::AbstractModel) returns the array of the names of the parameters in model.

WAICを計算する函数¶

データを $(X_k, Y_k)$ ($k=1,\ldots,n$) と書き, 確率モデルを $p(y|x,w)$ と書き, パラメーター $w$ に関する事後分布に関する平均と分散をそれぞれ $E_w[\;]$, $V_w[\;]$ と書くとき, WAICの定義は以下のように書ける:

$$ \begin{aligned} & T = -2\sum_{k=1}^n \log E_w[p(Y_k|X_k, w)], \\ & V = 2\sum_{k=1}^n V_w[\log p(Y_k|X_k, w)], \\ & \operatorname{WAIC} = T + V. \end{aligned} $$

$p(Y_k|X_k,w)$ は $\exp(\log p(Y_k|X_k,w))$ と書けるので、$\log p(Y_k|X_k, w)$ からWAICは計算可能である. パラメーター $w$ に関する事後分布のサンプルを $w_l$ ($l=1,\ldots,L$) と書くとき, 上の $T$, $V$ は以下のように近似計算される:

$$ \begin{aligned} & T \approx -2\sum_{k=1}^n \log\,\operatorname{mean}\,\{\,\exp(\operatorname{lp}_{k,l})\mid l=1,\ldots,L\,\}, \\ & V \approx 2\sum_{k=1}^n \operatorname{var}\,\{\, \operatorname{lp}_{k,l}\mid l=1,\ldots,L\,\}. \end{aligned} $$

ここで $\operatorname{mean}$ と $\operatorname{var}$ はそれぞれ標本平均と不偏分散を計算する函数であるとし,

$$ \operatorname{lp}_{k,l} = \log p(Y_k|X_k, w_l) $$

とおいた. 以下のセルではこれをそのままJuliaのコードに翻訳する.

In [3]:

"""
`logpdf_array(logpdf_func, data, posterior_sample)` returns the array `lp` definef by

```julia
lp[k, l] = logpdf_func(data[k, :], posterior_sample[l, :])
```
"""
function logpdf_array(
        logpdf_func,
        data::AbstractArray,
        posterior_sample::AbstractArray
    )
    n = size(data, 1)
    L = size(posterior_sample, 1)
    lp = Array{eltype(posterior_sample), 2}(undef, n, L)
    for k in 1:n, l in 1:L
        @views lp[k, l] = logpdf_func(data[k, :], posterior_sample[l, :])
    end
    lp
end

"""
`logpdf_array(logpdf_func, data, chain::AbstractChains, vars=get_vars(chain))` returns the array `lp` definef by

```julia
posterior_sample = get_posterior_sample(chain, vars)
lp[k, l] = logpdf_func(data[k, :], posterior_sample[l, :])
```
"""
function logpdf_array(
        logpdf_func,
        data::AbstractArray,
        chain::Turing.Inference.AbstractMCMC.AbstractChains,
        vars::AbstractVector=get_vars(chain)
    )
    posterior_sample = get_posterior_sample(chain, vars)
    logpdf_array(logpdf_func, data, posterior_sample)
end

"""
`training_error(lp)` returns the training error calculated from the logpdf array `lp`.
"""
function training_error(lp::AbstractArray{R, 2}) where {R}
    n, L = size(lp)
    -2sum(logmeanexp(@view lp[k, :]) for k in 1:n)
end

"""
`functional_variance(lp)` returns the functional variance calculated from the logpdf array `lp`.
"""
function functional_variance(lp::AbstractArray{R, 2}) where {R}
    n, L = size(lp)
    2sum(var(@view lp[k,:]) for k in 1:n)
end

"""
`waic(lp)` returns the WAIC, the training error, and the functional variance calculated from the logpdf array `lp`.
"""
function waic(lp::AbstractArray{R, 2}) where {R}
    T = training_error(lp)
    V = functional_variance(lp)
    WAIC = T + V
    (WAIC=WAIC, T=T, V=V)
end

"""
`waic(logpdf_func, data, posterior_sample)` returns the WAIC, the training error, and the functional variance calculated from the arguments.
"""
function waic(
        logpdf_func,
        data::AbstractArray,
        posterior_sample::AbstractArray)
    lp = logpdf_array(logpdf_func, data, posterior_sample)
    waic(lp)
end

"""
`waic(logpdf_func, data, chain, vars=get_vars(chain))` returns the WAIC, the training error, and the functional variance calculated from the arguments.

Example:

```julia
@model ols(X, Y) = begin
    a ~ Flat()
    b ~ Flat()
    σ ~ FlatPos(0.0)
    for k in eachindex(X)
        Y[k] ~ Normal(a + b*X[k], σ)
    end
end

n = 20
X = 3rand(n)
Y = X .+ 1 .+ 0.3randn(n)
chain_ols = psample(ols(X, Y), NUTS(), 2000, 3)

function logpdf_ols(x, w)
    X, Y = x
    a, b, σ = w
    logpdf(Normal(a + b*X, σ), Y)
end
data_ols = hcat(X, Y)
vars_ols = [:a, :b, :σ]

@show waic(logpdf_ols, data_ols, chain_ols, vars_ols)
@show chain_ols
plot(chain_ols)
```
"""
function waic(
        logpdf_func,
        data::AbstractArray,
        chain::Turing.Inference.AbstractMCMC.AbstractChains,
        vars::AbstractVector=get_vars(chain)
    )
    posterior_sample = get_posterior_sample(chain, vars)
    waic(logpdf_func, data, posterior_sample)
end

display(@doc logpdf_array)
display(@doc training_error)
display(@doc functional_variance)
display(@doc waic)

logpdf_array(logpdf_func, data, posterior_sample) returns the array lp definef by

lp[k, l] = logpdf_func(data[k, :], posterior_sample[l, :])

logpdf_array(logpdf_func, data, chain::AbstractChains, vars=get_vars(chain)) returns the array lp definef by

posterior_sample = get_posterior_sample(chain, vars)
lp[k, l] = logpdf_func(data[k, :], posterior_sample[l, :])

training_error(lp) returns the training error calculated from the logpdf array lp.

functional_variance(lp) returns the functional variance calculated from the logpdf array lp.

waic(lp) returns the WAIC, the training error, and the functional variance calculated from the logpdf array lp.

waic(logpdf_func, data, posterior_sample) returns the WAIC, the training error, and the functional variance calculated from the arguments.

waic(logpdf_func, data, chain, vars=get_vars(chain)) returns the WAIC, the training error, and the functional variance calculated from the arguments.

Example:

@model ols(X, Y) = begin
    a ~ Flat()
    b ~ Flat()
    σ ~ FlatPos(0.0)
    for k in eachindex(X)
        Y[k] ~ Normal(a + b*X[k], σ)
    end
end

n = 20
X = 3rand(n)
Y = X .+ 1 .+ 0.3randn(n)
chain_ols = psample(ols(X, Y), NUTS(), 2000, 3)

function logpdf_ols(x, w)
    X, Y = x
    a, b, σ = w
    logpdf(Normal(a + b*X, σ), Y)
end
data_ols = hcat(X, Y)
vars_ols = [:a, :b, :σ]

@show waic(logpdf_ols, data_ols, chain_ols, vars_ols)
@show chain_ols
plot(chain_ols)

LOOCVを計算する函数¶

さらに事前分布を $\varphi(w)$ と書くと, データ全体から $k$ 番目の $(X_k, Y_k)$ を除いてできるデータから得られる事後分布は

$$ \varphi^*_k(w) = \frac {\varphi(w)\prod_{i\ne k} p(Y_i|X_i, w)} {\int \varphi(w)\prod_{i\ne k} p(Y_i|X_i, w)\,dw} $$

と表わされる. これに関する平均を $E^k_w[\;]$ と書くことにする. このとき, 一個抜き出し交差検証(leave-one-out cross varidation, LOOCV)が次のように定義される:

$$ \operatorname{LOOCV} = -2\sum_{k=1}^n\log E^k_w[p(Y_k|X_k,w)] = -2\sum_{k=1}^n \int p(Y_k|X_k,w)\varphi^*_k(w)\,dw. $$

以上のLOOCVの定義を以下のように変形できる:

$$ \begin{aligned} \operatorname{LOOCV} &= -2\sum_{k=1}^n\log \frac {\int \varphi(w)p(Y_k|X_k,w)\prod_{i\ne k} p(Y_i|X_i, w)\,dw} {\int \varphi(w)\prod_{i\ne k} p(Y_i|X_i, w)\,dw} \\ &= -2\sum_{k=1}^n \log \frac {\int \varphi(w)\prod_{i=1}^n p(Y_i|X_i, w)\,dw} {\int \varphi(w)p(Y_k|X_k,w)^{-1}\prod_{i=1}^n p(Y_i|X_i, w)\,dw} \\ &= -2\sum_{k=1}^n \log \left( \frac {\int \varphi(w)p(Y_k|X_k,w)^{-1}\prod_{i=1}^n p(Y_i|X_i, w)\,dw} {\int \varphi(w)\prod_{i=1}^n p(Y_i|X_i, w)\,dw} \right)^{-1} \\ &= -2\sum_{k=1}^n \log E_w\left[p(Y_k|X_k,w)^{-1}\right]^{-1} \\ &= 2\sum_{k=1}^n \log E_w\left[p(Y_k|X_k,w)^{-1}\right] \\ &= 2\sum_{k=1}^n \log E_w[\exp(-\log p(Y_k|X_k,w))]. \end{aligned} $$

最初の等号で $\varphi^*_k(w)$ の定義を使った. 4つ目の等号で,

$$ E_w[f(w)] = \int f(w)\varphi^*(w)\,dw, \quad \varphi^*(w) = \frac {\varphi(w)\prod_{i=1}^n p(Y_i|X_i, w)} {\int \varphi(w)\prod_{i=1}^n p(Y_i|X_i, w)\,dw} $$

を使い, 5つ目の等号で $\log x^{-1}=-\log x$ を使った.

上の変形の最後の式を使えば前節の記号のもとでLOOCVを次のように近似計算可能である:

$$ \operatorname{LOOCV} \approx 2\sum_{k=1}^n \log\,\operatorname{mean}\,\{\,\exp(-\log\operatorname{lp}_{k,l})\mid l=1,\ldots,L\,\}. $$

In [4]:

"""
`loocv(lp)` returns the LOOCV from the logpdf array `lp`.
"""
function loocv(lp::AbstractArray{R, 2}) where {R}
    n, L = size(lp)
    2sum(logmeanexp(- @view lp[k, :]) for k in 1:n)
end

"""
`loocv(logpdf_func, data, posterior_sample)` returns the LOOCV calculated from the arguments.
"""
function loocv(
        logpdf_func,
        data::AbstractArray,
        posterior_sample::AbstractArray)
    lp = logpdf_array(logpdf_func, data, posterior_sample)
    loocv(lp)
end

"""
`loocv(logpdf_func, data, chain, vars=get_vars(chain))` returns the LOOCV calculated from the arguments.
"""
function loocv(
        logpdf_func,
        data::AbstractArray,
        chain::Turing.Inference.AbstractMCMC.AbstractChains,
        vars::AbstractVector=get_vars(chain)
    )
    posterior_sample = get_posterior_sample(chain, vars)
    loocv(logpdf_func, data, posterior_sample)
end

@doc loocv

Out[4]:

loocv(lp) returns the LOOCV from the logpdf array lp.

loocv(logpdf_func, data, posterior_sample) returns the LOOCV calculated from the arguments.

loocv(logpdf_func, data, chain, vars=get_vars(chain)) returns the LOOCV calculated from the arguments.

予測分布のpdfを作る函数¶

In [5]:

"""
`make_pred_pdf(logpdf_func, posterior_sample)` makes the pdf of the predictive distribution from the arguments.
"""
function make_pred_pdf(logpdf_func, posterior_sample::AbstractArray)
    L = size(posterior_sample, 1)
    pred_pdf(x) = mean(exp(logpdf_func(x, @view(posterior_sample[l, :]))) for l in 1:L)
    pred_pdf
end

"""
`make_pred_pdf(logpdf_func, chain::AbstractChains, vars::AbstractVector)` makes the pdf of the predictive distribution from the arguments.
"""
function make_pred_pdf(
        logpdf_func,
        chain::Turing.Inference.AbstractMCMC.AbstractChains,
        vars::AbstractVector=get_vars(chain)
    )
    posterior_sample = get_posterior_sample(chain, vars)
    make_pred_pdf(logpdf_func, posterior_sample)
end

@doc make_pred_pdf

Out[5]:

make_pred_pdf(logpdf_func, posterior_sample) makes the pdf of the predictive distribution from the arguments.

make_pred_pdf(logpdf_func, chain::AbstractChains, vars::AbstractVector) makes the pdf of the predictive distribution from the arguments.

In [6]:

seed!(4649)
posterior_sample_test = [5, 0] .+ 1.2randn(2, 100)
logpdf_test(x, w) = logpdf(Gamma(exp(w[1]), exp(w[2])), x[1])
pred_pdf_test = make_pred_pdf(logpdf_test, posterior_sample_test)
@show pred_pdf_test([5.0])
@show pred_pdf_test(5.0)

plot(size=(400, 250))
plot!(pred_pdf_test, 0, 50; label="pred_pdf_test")

pred_pdf_test([5.0]) = 0.01698634409454517
pred_pdf_test(5.0) = 0.01698634409454517

Out[6]:

ベイズ版最小二乗法¶

In [7]:

seed!(20200323);

In [8]:

@model ols(X, Y) = begin
    a ~ Flat()
    b ~ Flat()
    σ ~ FlatPos(0.0)
    for k in eachindex(X)
        Y[k] ~ Normal(a + b*X[k], σ)
    end
end

n = 20
X = 3rand(n)
Y = X .+ 1 .+ 0.3randn(n)
chain_ols = psample(ols(X, Y), NUTS(), 2000, 3)

function logpdf_ols(x, w)
    X, Y = x
    a, b, σ = w
    logpdf(Normal(a + b*X, σ), Y)
end
data_ols = hcat(X, Y)
vars_ols = [:a, :b, :σ]
waic_ols = waic(logpdf_ols, data_ols, chain_ols, vars_ols)
loocv_ols = loocv(logpdf_ols, data_ols, chain_ols, vars_ols)

@show waic_ols
@show loocv_ols
@show chain_ols
plot(chain_ols; lw=0.5)

┌ Info: Found initial step size
│   ϵ = 0.05
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:08

waic_ols = (WAIC = 18.849114851565552, T = 14.08884910101609, V = 4.760265750549462)
loocv_ols = 19.01539110495765
chain_ols = Object of type Chains, with data of type 1000×15×3 Array{Real,3}

Iterations        = 1:1000
Thinning interval = 1
Chains            = 1, 2, 3
Samples per chain = 1000
internals         = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth
parameters        = a, b, σ

2-element Array{ChainDataFrame,1}

Summary Statistics
  parameters    mean     std  naive_se    mcse        ess   r_hat
  ──────────  ──────  ──────  ────────  ──────  ─────────  ──────
           a  1.0951  0.1650    0.0030  0.0054   989.8906  1.0017
           b  0.8655  0.1135    0.0021  0.0033  1102.7093  1.0009
           σ  0.3785  0.0680    0.0012  0.0015  1534.5884  0.9995

Quantiles
  parameters    2.5%   25.0%   50.0%   75.0%   97.5%
  ──────────  ──────  ──────  ──────  ──────  ──────
           a  0.7706  0.9880  1.0940  1.2039  1.4164
           b  0.6406  0.7917  0.8659  0.9376  1.0872
           σ  0.2752  0.3302  0.3691  0.4158  0.5289

Out[8]:

In [9]:

posterior_sample_ols = get_posterior_sample(chain_ols, vars_ols)
a, b, σ = mapslices(median, posterior_sample_ols, dims=1)
@show a, b, σ
lp = logpdf_array(logpdf_ols, data_ols, posterior_sample_ols)
@show T = training_error(lp)
@show V = functional_variance(lp)
@show WAIC = T + V
@show LOOCV = loocv(lp)
relax()
display(waic(lp))

X, Y = data_ols[:,1], data_ols[:,2]
Phi = hcat(ones(length(X)), X)
a, b = Phi\Y
σ² = mean((Y - Phi*[a,b]).^2)
@show a, b, √σ²
T = -2sum(logpdf(Normal(Phi[k,:]'*[a,b], √σ²), Y[k]) for k in 1:length(X))
nparams = 3
@show T
@show 2nparams
@show AIC = T + 2nparams
relax()

(AIC=AIC, T=T, V=2nparams) |> display

(a, b, σ) = (1.0939559289468321, 0.8659305919345646, 0.3690821664936669)
T = training_error(lp) = 14.08884910101609
V = functional_variance(lp) = 4.760265750549462
WAIC = T + V = 18.849114851565552
LOOCV = loocv(lp) = 19.01539110495765

(WAIC = 18.849114851565552, T = 14.08884910101609, V = 4.760265750549462)

(a, b, √σ²) = (1.0858106841809112, 0.8718885100467602, 0.33313315445256647)
T = 12.78902109999629
2nparams = 6
AIC = T + 2nparams = 18.78902109999629

(AIC = 18.78902109999629, T = 12.78902109999629, V = 6)

In [10]:

plot(size=(400, 250))
scatter!(X, Y; label="sample")
plot!(x -> x + 1, -0.2, 3.2; label="x+1")
plot!(x -> x + 1 + 2*0.3, -0.2, 3.2; label="±2σ", color=:black, ls=:dot)
plot!(x -> x + 1 - 2*0.3, -0.2, 3.2; label="", color=:black, ls=:dot)

Out[10]:

In [11]:

pred_pdf_ols = make_pred_pdf(logpdf_ols, posterior_sample_ols)

x = range(-0.5, 3.5; length=100)
y = range(0.5, 4.5; length=100)
@time z = ((x, y) -> pred_pdf_ols((x, y))).(x', y)
P1 = plot(size=(400, 400), legend=false)
plot!(xtick=-10:10, ytick=-10:10)
plot!(xlim=extrema(x), ylim=extrema(y))
heatmap!(x, y, z)
scatter!(X, Y; label="sample", color=:cyan)

x = range(-7, 10; length=100)
y = range(-6, 11; length=100)
@time z = ((x, y) -> pred_pdf_ols((x, y))).(x', y)
P2 = plot(size=(400, 400), legend=false)
plot!(xtick=-10:2:10, ytick=-10:2:10)
plot!(xlim=extrema(x), ylim=extrema(y))
heatmap!(x, y, z)
scatter!(X, Y; label="sample", color=:cyan)

plot(P1, P2; size=(800, 400), layout=(1, 2))

  3.462194 seconds (30.62 M allocations: 1.371 GiB, 12.37% gc time)
  3.145276 seconds (30.26 M allocations: 1.352 GiB, 12.28% gc time)

Out[11]:

複数の確率モデルの比較¶

In [12]:

normal_dist(a, b) = Normal(a, exp(b))
logpdf_normal(x, w) = logpdf(normal_dist(w[1], w[2]), x[1])
@doc Normal

Out[12]:

Normal(μ,σ)

The Normal distribution with mean μ and standard deviation σ≥0 has probability density function

$$ f(x; \mu, \sigma) = \frac{1}{\sqrt{2 \pi \sigma^2}} \exp \left( - \frac{(x - \mu)^2}{2 \sigma^2} \right) $$

Note that if σ == 0, then the distribution is a point mass concentrated at μ. Though not technically a continuous distribution, it is allowed so as to account for cases where σ may have underflowed, and the functions are defined by taking the pointwise limit as $σ → 0$.

Normal()          # standard Normal distribution with zero mean and unit variance
Normal(mu)        # Normal distribution with mean mu and unit variance
Normal(mu, sig)   # Normal distribution with mean mu and variance sig^2

params(d)         # Get the parameters, i.e. (mu, sig)
mean(d)           # Get the mean, i.e. mu
std(d)            # Get the standard deviation, i.e. sig

External links

Normal distribution on Wikipedia

In [13]:

gamma_dist(a, b) = Gamma(exp(a), exp(b))
logpdf_gamma(x, w) = logpdf(gamma_dist(w[1], w[2]), x[1])
@doc Gamma

Out[13]:

Gamma(α,θ)

The Gamma distribution with shape parameter α and scale θ has probability density function

$$ f(x; \alpha, \theta) = \frac{x^{\alpha-1} e^{-x/\theta}}{\Gamma(\alpha) \theta^\alpha}, \quad x > 0 $$

Gamma()          # Gamma distribution with unit shape and unit scale, i.e. Gamma(1, 1)
Gamma(α)         # Gamma distribution with shape α and unit scale, i.e. Gamma(α, 1)
Gamma(α, θ)      # Gamma distribution with shape α and scale θ

params(d)        # Get the parameters, i.e. (α, θ)
shape(d)         # Get the shape parameter, i.e. α
scale(d)         # Get the scale parameter, i.e. θ

External links

Gamma distribution on Wikipedia

In [14]:

inversegamma_dist(a, b) = InverseGamma(exp(a), exp(b))
logpdf_inversegamma(x, w) = logpdf(inversegamma_dist(w[1], w[2]), x[1])
@doc InverseGamma

Out[14]:

InverseGamma(α, θ)

The inverse Gamma distribution with shape parameter α and scale θ has probability density function

$$ f(x; \alpha, \theta) = \frac{\theta^\alpha x^{-(\alpha + 1)}}{\Gamma(\alpha)} e^{-\frac{\theta}{x}}, \quad x > 0 $$

It is related to the Gamma distribution: if $X \sim \operatorname{Gamma}(\alpha, \beta)$, then $1 / X \sim \operatorname{InverseGamma}(\alpha, \beta^{-1})$.

InverseGamma()        # Inverse Gamma distribution with unit shape and unit scale, i.e. InverseGamma(1, 1)
InverseGamma(α)       # Inverse Gamma distribution with shape α and unit scale, i.e. InverseGamma(α, 1)
InverseGamma(α, θ)    # Inverse Gamma distribution with shape α and scale θ

params(d)        # Get the parameters, i.e. (α, θ)
shape(d)         # Get the shape parameter, i.e. α
scale(d)         # Get the scale parameter, i.e. θ

External links

Inverse gamma distribution on Wikipedia

In [15]:

lognormal_dist(a, b) = LogNormal(a, exp(b))
logpdf_lognormal(x, w) = logpdf(lognormal_dist(w[1], w[2]), x[1])
@doc LogNormal

Out[15]:

LogNormal(μ,σ)

The log normal distribution is the distribution of the exponential of a Normal variate: if $X \sim \operatorname{Normal}(\mu, \sigma)$ then $\exp(X) \sim \operatorname{LogNormal}(\mu,\sigma)$. The probability density function is

$$ f(x; \mu, \sigma) = \frac{1}{x \sqrt{2 \pi \sigma^2}} \exp \left( - \frac{(\log(x) - \mu)^2}{2 \sigma^2} \right), \quad x > 0 $$

LogNormal()          # Log-normal distribution with zero log-mean and unit scale
LogNormal(mu)        # Log-normal distribution with log-mean mu and unit scale
LogNormal(mu, sig)   # Log-normal distribution with log-mean mu and scale sig

params(d)            # Get the parameters, i.e. (mu, sig)
meanlogx(d)          # Get the mean of log(X), i.e. mu
varlogx(d)           # Get the variance of log(X), i.e. sig^2
stdlogx(d)           # Get the standard deviation of log(X), i.e. sig

External links

Log normal distribution on Wikipedia

In [16]:

weibull_dist(a, b) = Weibull(exp(a), exp(b))
logpdf_weibull(x, w) = logpdf(weibull_dist(w[1], w[2]), x[1])
@doc Weibull

Out[16]:

Weibull(α,θ)

The Weibull distribution with shape α and scale θ has probability density function

$$ f(x; \alpha, \theta) = \frac{\alpha}{\theta} \left( \frac{x}{\theta} \right)^{\alpha-1} e^{-(x/\theta)^\alpha}, \quad x \ge 0 $$

Weibull()        # Weibull distribution with unit shape and unit scale, i.e. Weibull(1, 1)
Weibull(a)       # Weibull distribution with shape a and unit scale, i.e. Weibull(a, 1)
Weibull(a, b)    # Weibull distribution with shape a and scale b

params(d)        # Get the parameters, i.e. (a, b)
shape(d)         # Get the shape parameter, i.e. a
scale(d)         # Get the scale parameter, i.e. b

External links

Weibull distribution on Wikipedia

In [17]:

@show dist_true = weibull_dist(1.5, 1/10)

seed!(4649)
n = 1000
X = rand(dist_true, n)
plot(size=(500, 350), legendfontsize=10)
histogram!(X; norm=true, alpha=0.3, label="sample")
plot!(dist_true, 0, 2; label="dist_true")

dist_true = weibull_dist(1.5, 1 / 10) = Weibull{Float64}(α=4.4816890703380645, θ=1.1051709180756477)

Out[17]:

In [18]:

@model testmodel(X, dist_func) = begin
    a ~ Flat()
    b ~ Flat()
    X .~ dist_func(a, b)
end

Out[18]:

##testmodel#430 (generic function with 2 methods)

In [19]:

chain_normal = psample(testmodel(X, normal_dist), NUTS(), 2000, 3)
waic_normal = waic(logpdf_normal, X, chain_normal)
@show waic_normal
loocv_normal = loocv(logpdf_normal, X, chain_normal)
@show loocv_normal
println()
@show chain_normal
plot(chain_normal; lw=0.5)

┌ Info: Found initial step size
│   ϵ = 0.05
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:05

waic_normal = (WAIC = 143.7520736219336, T = 139.9459413074622, V = 3.8061323144714003)
loocv_normal = 143.75090489485888

chain_normal = Object of type Chains, with data of type 1000×14×3 Array{Real,3}

Iterations        = 1:1000
Thinning interval = 1
Chains            = 1, 2, 3
Samples per chain = 1000
internals         = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth
parameters        = a, b

2-element Array{ChainDataFrame,1}

Summary Statistics
  parameters     mean     std  naive_se    mcse        ess   r_hat
  ──────────  ───────  ──────  ────────  ──────  ─────────  ──────
           a   1.0024  0.0082    0.0001  0.0001  3076.7545  1.0005
           b  -1.3479  0.0220    0.0004  0.0004  3105.4581  0.9995

Quantiles
  parameters     2.5%    25.0%    50.0%    75.0%    97.5%
  ──────────  ───────  ───────  ───────  ───────  ───────
           a   0.9862   0.9969   1.0023   1.0078   1.0196
           b  -1.3910  -1.3631  -1.3472  -1.3328  -1.3052

Out[19]:

In [20]:

chain_gamma = psample(testmodel(X, gamma_dist), NUTS(), 2000, 3)
waic_gamma = waic(logpdf_gamma, X, chain_gamma)
@show waic_gamma
loocv_gamma = loocv(logpdf_gamma, X, chain_gamma)
@show loocv_gamma
println()
@show chain_gamma
plot(chain_gamma; lw=0.5)

┌ Info: Found initial step size
│   ϵ = 0.025
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:14

waic_gamma = (WAIC = 242.55619795660647, T = 237.50715503275975, V = 5.049042923846727)
loocv_gamma = 242.55334597773796

chain_gamma = Object of type Chains, with data of type 1000×14×3 Array{Real,3}

Iterations        = 1:1000
Thinning interval = 1
Chains            = 1, 2, 3
Samples per chain = 1000
internals         = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth
parameters        = a, b

2-element Array{ChainDataFrame,1}

Summary Statistics
  parameters     mean     std  naive_se    mcse       ess   r_hat
  ──────────  ───────  ──────  ────────  ──────  ────────  ──────
           a   2.5501  0.0433    0.0008  0.0018  714.8608  1.0022
           b  -2.5479  0.0443    0.0008  0.0019  715.1871  1.0024

Quantiles
  parameters     2.5%    25.0%    50.0%    75.0%    97.5%
  ──────────  ───────  ───────  ───────  ───────  ───────
           a   2.4626   2.5220   2.5504   2.5789   2.6387
           b  -2.6377  -2.5777  -2.5478  -2.5187  -2.4582

Out[20]:

In [21]:

chain_inversegamma = psample(testmodel(X, inversegamma_dist), NUTS(), 2000, 3)
waic_inversegamma = waic(logpdf_inversegamma, X, chain_inversegamma)
@show waic_inversegamma
loocv_inversegamma = loocv(logpdf_inversegamma, X, chain_inversegamma)
@show loocv_inversegamma
println()
@show chain_inversegamma
plot(chain_inversegamma; lw=0.5)

┌ Info: Found initial step size
│   ϵ = 0.025
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:10

waic_inversegamma = (WAIC = 501.0669809819272, T = 491.5322296725455, V = 9.534751309381688)
loocv_inversegamma = 501.0250547819082

chain_inversegamma = Object of type Chains, with data of type 1000×14×3 Array{Real,3}

Iterations        = 1:1000
Thinning interval = 1
Chains            = 1, 2, 3
Samples per chain = 1000
internals         = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth
parameters        = a, b

2-element Array{ChainDataFrame,1}

Summary Statistics
  parameters    mean     std  naive_se    mcse       ess   r_hat
  ──────────  ──────  ──────  ────────  ──────  ────────  ──────
           a  2.3042  0.0427    0.0008  0.0016  740.5161  1.0009
           b  2.2160  0.0435    0.0008  0.0016  748.1955  1.0011

Quantiles
  parameters    2.5%   25.0%   50.0%   75.0%   97.5%
  ──────────  ──────  ──────  ──────  ──────  ──────
           a  2.2236  2.2742  2.3035  2.3347  2.3868
           b  2.1303  2.1863  2.2144  2.2479  2.2996

Out[21]:

In [22]:

chain_lognormal = psample(testmodel(X, lognormal_dist), NUTS(), 2000, 3)
waic_lognormal = waic(logpdf_lognormal, X, chain_lognormal)
@show waic_lognormal
loocv_lognormal = loocv(logpdf_lognormal, X, chain_lognormal)
@show loocv_lognormal
println()
@show chain_lognormal
plot(chain_lognormal; lw=0.5)

┌ Info: Found initial step size
│   ϵ = 0.025
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:03

waic_lognormal = (WAIC = 343.79099285707605, T = 336.58010836584594, V = 7.2108844912300984)
loocv_lognormal = 343.78791583887903

chain_lognormal = Object of type Chains, with data of type 1000×14×3 Array{Real,3}

Iterations        = 1:1000
Thinning interval = 1
Chains            = 1, 2, 3
Samples per chain = 1000
internals         = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth
parameters        = a, b

2-element Array{ChainDataFrame,1}

Summary Statistics
  parameters     mean     std  naive_se    mcse        ess   r_hat
  ──────────  ───────  ──────  ────────  ──────  ─────────  ──────
           a  -0.0372  0.0094    0.0002  0.0002  3244.1229  0.9999
           b  -1.2116  0.0231    0.0004  0.0004  3111.7774  0.9992

Quantiles
  parameters     2.5%    25.0%    50.0%    75.0%    97.5%
  ──────────  ───────  ───────  ───────  ───────  ───────
           a  -0.0550  -0.0435  -0.0371  -0.0307  -0.0195
           b  -1.2558  -1.2275  -1.2116  -1.1961  -1.1667

Out[22]:

In [23]:

chain_weibull = psample(testmodel(X, weibull_dist), NUTS(), 2000, 3)
waic_weibull = waic(logpdf_weibull, X, chain_weibull)
@show waic_weibull
loocv_weibull = loocv(logpdf_weibull, X, chain_weibull)
@show loocv_weibull
println()
@show chain_weibull
plot(chain_weibull; lw=0.5)

┌ Info: Found initial step size
│   ϵ = 0.05
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC C:\Users\genkuroki\.julia\packages\AdvancedHMC\haUrH\src\hamiltonian.jl:44
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, true, true, false)
└ @ AdvancedHMC C:\Users\genkuroki\.julia\packages\AdvancedHMC\haUrH\src\hamiltonian.jl:44
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:07

waic_weibull = (WAIC = 135.8190696247778, T = 131.69550370400378, V = 4.123565920773999)
loocv_weibull = 135.81696106444352

chain_weibull = Object of type Chains, with data of type 1000×14×3 Array{Real,3}

Iterations        = 1:1000
Thinning interval = 1
Chains            = 1, 2, 3
Samples per chain = 1000
internals         = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth
parameters        = a, b

2-element Array{ChainDataFrame,1}

Summary Statistics
  parameters    mean     std  naive_se    mcse        ess   r_hat
  ──────────  ──────  ──────  ────────  ──────  ─────────  ──────
           a  1.4748  0.0252    0.0005  0.0006  2326.7643  0.9993
           b  0.0951  0.0076    0.0001  0.0002  2044.4410  0.9997

Quantiles
  parameters    2.5%   25.0%   50.0%   75.0%   97.5%
  ──────────  ──────  ──────  ──────  ──────  ──────
           a  1.4247  1.4576  1.4744  1.4924  1.5230
           b  0.0799  0.0899  0.0953  0.1002  0.1094

Out[23]:

In [24]:

@show waic_normal |> mrd
@show waic_gamma |> mrd
@show waic_inversegamma |> mrd
@show waic_lognormal |> mrd
@show waic_weibull |> mrd
println()
@show loocv_normal |> mrd
@show loocv_gamma |> mrd
@show loocv_inversegamma |> mrd
@show loocv_lognormal |> mrd
@show loocv_weibull |> mrd
;

waic_normal |> mrd = (WAIC = 143.752, T = 139.946, V = 3.806)
waic_gamma |> mrd = (WAIC = 242.556, T = 237.507, V = 5.049)
waic_inversegamma |> mrd = (WAIC = 501.067, T = 491.532, V = 9.535)
waic_lognormal |> mrd = (WAIC = 343.791, T = 336.58, V = 7.211)
waic_weibull |> mrd = (WAIC = 135.819, T = 131.696, V = 4.124)

loocv_normal |> mrd = 143.751
loocv_gamma |> mrd = 242.553
loocv_inversegamma |> mrd = 501.025
loocv_lognormal |> mrd = 343.788
loocv_weibull |> mrd = 135.817

In [25]:

pred_normal = make_pred_pdf(logpdf_normal, chain_normal)
pred_gamma = make_pred_pdf(logpdf_gamma, chain_gamma)
pred_inversegamma = make_pred_pdf(logpdf_inversegamma, chain_inversegamma)
pred_lognormal = make_pred_pdf(logpdf_lognormal, chain_lognormal)
pred_weibull = make_pred_pdf(logpdf_weibull, chain_weibull)

plot(legendfontsize=10)
histogram!(X; norm=true, alpha=0.2, label="sample")
plot!(x -> pred_weibull([x]), 0, 2.5; label="weibull", ls=:dash, lw=1.5)
plot!(x -> pred_normal([x]), 0, 2.5; label="normal", ls=:dashdot, lw=1.2)
plot!(x -> pred_gamma([x]), 0, 2.5; label="gamma", ls=:dot, lw=1.2)
plot!(x -> pred_inversegamma([x]), 0, 2.5; label="inversegamma", ls=:dot, lw=1.2)
plot!(x -> pred_lognormal([x]), 0, 2.5; label="lognormal", ls=:dot, lw=1.2)
plot!(dist_true, 0, 2.5; label="dist_true", color=:blue, alpha=0.7, lw=0.8)

Out[25]:

COVID-19 の潜伏期間の推定¶

http://mikuhatsune.hatenadiary.com/entry/2020/03/19/231812

In [26]:

tStartExposure = [-2, -18, -18, 10, 3, 8, 12, 13, -18, -18, 8, 10, -11, -18, -18, -18, 12, -18, -18, -18, -18, -18, -18, 11, 11, -18, -18, -18, -18, -18, 12, 12, 15, 15, -18, -18, -18, -18, 19, -18, -18, -18, -18, -18, -18, -18, 18, -18, -18, 6, -18, -18, -18, 9, -18, 11, -18, -18, -18, -18, -18, -18, -18, -18, -18, -18, 19, -18, -18, -18, -18, -18, -18, -18, 13, 13, -18, -18, -18, -18, -18, -18, 13, -18, -18, -18, -18, -18]

tEndExposure = [3, 12, 3, 11, 4, 16, 16, 16, 15, 15, 16, 11, 9, 2, 12, 17, 15, 17, 18, 13, 16, 11, 18, 14, 18, 20, 12, 10, 17, 15, 15, 14, 17, 20, 18, 13, 23, 23, 22, 20, 21, 21, 21, 15, 21, 17, 20, 18, 20, 7, 20, 20, 20, 20, 18, 22, 22, 18, 18, 18, 9, 20, 18, 22, 23, 19, 19, 22, 22, 13, 22, 22, 23, 23, 17, 17, 22, 18, 18, 22, 18, 20, 15, 20, 19, 23, 22, 22]

tSymptomOnset = [3, 15, 4, 14, 9, 16, 16, 16, 16, 16, 16, 14, 10, 10, 14, 20, 19, 19, 20, 20, 17, 13, 19, 19, 20, 21, 18, 18, 18, 16, 20, 16, 20, 20, 19, 17, 24, 24, 23, 21, 23, 23, 23, 21, 22, 24, 24, 19, 21, 14, 23, 23, 21, 20, 21, 22, 23, 19, 23, 21, 13, 22, 24, 25, 25, 25, 25, 24, 24, 21, 23, 23, 24, 25, 18, 18, 23, 22, 22, 24, 24, 25, 22, 25, 25, 24, 25, 25]

@show d = length(tStartExposure)

hcat(tStartExposure, tEndExposure, tSymptomOnset)'

d = length(tStartExposure) = 88

Out[26]:

3×88 Adjoint{Int64,Array{Int64,2}}:
 -2  -18  -18  10  3   8  12  13  -18  …  -18  13  -18  -18  -18  -18  -18
  3   12    3  11  4  16  16  16   15      20  15   20   19   23   22   22
  3   15    4  14  9  16  16  16   16      25  22   25   25   24   25   25

"n=1" の推定とみなしたときのWAICとLOOCV¶

In [27]:

data_ip = hcat(tStartExposure', tEndExposure', tSymptomOnset')
size(data_ip)

Out[27]:

(1, 264)

In [28]:

@model incubation_period(
    dist_func,
    tStartExposure,
    tEndExposure,
    tSymptomOnset,
    ::Type{VT}=Vector{Float64}
) where {VT} = begin
    n = length(tStartExposure)
    uE = VT(undef, n)
    a ~ Flat()
    b ~ Flat()
    uE .~ Uniform()
    for k in 1:n
        tE = tStartExposure[k] + uE[k]*(tEndExposure[k] - tStartExposure[k])
        tSymptomOnset[k] ~ LocationScale(tE, 1.0, dist_func(a, b))
    end
end

model_ip(dist_func) = incubation_period(
    dist_func,
    tStartExposure,
    tEndExposure,
    tSymptomOnset
)

function logpdf_ip(x, w; dist_func=dist_func, d=length(tStartExposure))
    a = w[1]
    b = w[2]
    uE = @view w[3:end]
    sum(logpdf(dist_func(a, b), x[2d+k] - (x[k] + uE[k]*(x[d+k]-x[k]))) for k in 1:d)
end

Out[28]:

logpdf_ip (generic function with 1 method)

In [29]:

L = 1000 + 1000
nchains = 3

Out[29]:

In [30]:

@time chain_ip_gamma = psample(model_ip(gamma_dist), NUTS(), L, nchains);

┌ Info: Found initial step size
│   ϵ = 0.05
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC C:\Users\genkuroki\.julia\packages\AdvancedHMC\haUrH\src\hamiltonian.jl:44
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:10:44

650.187142 seconds (6.56 G allocations: 1.728 TiB, 33.46% gc time)

In [31]:

#chain_ip_gamma
chain_ip_gamma[["a"; "b"; ["uE[$i]" for i in 1:10:d]]]

Out[31]:

Object of type Chains, with data of type 1000×11×3 Array{Real,3}

Iterations        = 1:1000
Thinning interval = 1
Chains            = 1, 2, 3
Samples per chain = 1000
parameters        = a, b, uE[1], uE[11], uE[21], uE[31], uE[41], uE[51], uE[61], uE[71], uE[81]

2-element Array{ChainDataFrame,1}

Summary Statistics
  parameters    mean     std  naive_se    mcse        ess   r_hat
  ──────────  ──────  ──────  ────────  ──────  ─────────  ──────
           a  1.7228  0.3096    0.0057  0.0117   490.9173  1.0034
           b  0.1612  0.3609    0.0066  0.0140   498.1078  1.0027
       uE[1]  0.2569  0.1795    0.0033  0.0029  4105.3946  1.0000
      uE[11]  0.3464  0.2030    0.0037  0.0030  3747.1291  0.9998
      uE[21]  0.8373  0.0864    0.0016  0.0018  3512.9622  0.9997
      uE[31]  0.5359  0.2846    0.0052  0.0034  4880.1431  0.9999
      uE[41]  0.8808  0.0750    0.0014  0.0013  4165.0778  0.9994
      uE[51]  0.8979  0.0722    0.0013  0.0013  3986.7499  0.9997
      uE[61]  0.8746  0.1018    0.0019  0.0021  3624.6169  0.9993
      uE[71]  0.8592  0.0737    0.0013  0.0010  3907.3316  0.9998
      uE[81]  0.9254  0.0665    0.0012  0.0012  3450.9053  1.0000

Quantiles
  parameters     2.5%    25.0%   50.0%   75.0%   97.5%
  ──────────  ───────  ───────  ──────  ──────  ──────
           a   1.0807   1.5080  1.7376  1.9358  2.3066
           b  -0.4899  -0.0826  0.1401  0.3950  0.9330
       uE[1]   0.0130   0.1074  0.2235  0.3778  0.6638
      uE[11]   0.0238   0.1792  0.3326  0.4955  0.7566
      uE[21]   0.6356   0.7891  0.8507  0.9001  0.9669
      uE[31]   0.0328   0.3000  0.5520  0.7807  0.9773
      uE[41]   0.7012   0.8427  0.8936  0.9337  0.9867
      uE[51]   0.7201   0.8580  0.9115  0.9515  0.9941
      uE[61]   0.6048   0.8271  0.8977  0.9481  0.9948
      uE[71]   0.6807   0.8206  0.8703  0.9115  0.9691
      uE[81]   0.7583   0.8957  0.9429  0.9750  0.9976

In [32]:

#@time plot(chain_ip_gamma[2:2:end]; lw=0.5, alpha=0.8)
@time plot(chain_ip_gamma[["a"; "b"; ["uE[$i]" for i in 1:10:d]]][2:2:end]; lw=0.5, alpha=0.8)

  0.974425 seconds (1.76 M allocations: 81.750 MiB)

Out[32]:

In [33]:

dist_func = gamma_dist
waic_ip_gamma = waic(logpdf_ip, data_ip, chain_ip_gamma)
loocv_ip_gamma = loocv(logpdf_ip, data_ip, chain_ip_gamma)
@show waic_ip_gamma
@show loocv_ip_gamma;

waic_ip_gamma = (WAIC = 828.0910756394273, T = 345.5145214862969, V = 482.5765541531305)
loocv_ip_gamma = 521.7608033574322

In [34]:

@time chain_ip_lognormal = psample(model_ip(lognormal_dist), NUTS(), L, nchains);

┌ Info: Found initial step size
│   ϵ = 0.2
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:06:53

417.389136 seconds (3.77 G allocations: 1016.697 GiB, 36.79% gc time)

In [35]:

#chain_ip_lognormal
chain_ip_lognormal[["a"; "b"; ["uE[$i]" for i in 1:10:d]]]

Out[35]:

Object of type Chains, with data of type 1000×11×3 Array{Real,3}

Iterations        = 1:1000
Thinning interval = 1
Chains            = 1, 2, 3
Samples per chain = 1000
parameters        = a, b, uE[1], uE[11], uE[21], uE[31], uE[41], uE[51], uE[61], uE[71], uE[81]

2-element Array{ChainDataFrame,1}

Summary Statistics
  parameters     mean     std  naive_se    mcse        ess   r_hat
  ──────────  ───────  ──────  ────────  ──────  ─────────  ──────
           a   1.8033  0.0897    0.0016  0.0024  1689.2387  1.0019
           b  -0.7701  0.1789    0.0033  0.0082   539.6518  1.0031
       uE[1]   0.2456  0.1710    0.0031  0.0031  3976.5064  0.9994
      uE[11]   0.3554  0.1909    0.0035  0.0039  2606.2328  1.0009
      uE[21]   0.8279  0.1032    0.0019  0.0022  3090.9849  1.0022
      uE[31]   0.5479  0.2871    0.0052  0.0042  4505.0488  0.9994
      uE[41]   0.8718  0.0974    0.0018  0.0014  3635.0052  0.9991
      uE[51]   0.8880  0.0924    0.0017  0.0016  3292.2488  0.9998
      uE[61]   0.8651  0.1258    0.0023  0.0023  3474.5801  0.9996
      uE[71]   0.8528  0.0912    0.0017  0.0016  3408.6556  0.9994
      uE[81]   0.9088  0.0938    0.0017  0.0016  3146.7634  0.9996

Quantiles
  parameters     2.5%    25.0%    50.0%    75.0%    97.5%
  ──────────  ───────  ───────  ───────  ───────  ───────
           a   1.6367   1.7426   1.8006   1.8605   1.9913
           b  -1.1426  -0.8925  -0.7687  -0.6450  -0.4267
       uE[1]   0.0090   0.1036   0.2204   0.3621   0.6256
      uE[11]   0.0239   0.2029   0.3555   0.4950   0.7188
      uE[21]   0.5610   0.7859   0.8483   0.8977   0.9560
      uE[31]   0.0279   0.3089   0.5691   0.8002   0.9808
      uE[41]   0.6296   0.8335   0.8944   0.9357   0.9893
      uE[51]   0.6458   0.8529   0.9111   0.9519   0.9920
      uE[61]   0.5270   0.8214   0.8996   0.9516   0.9958
      uE[71]   0.6101   0.8162   0.8735   0.9139   0.9684
      uE[81]   0.6534   0.8770   0.9371   0.9718   0.9981

In [36]:

#@time plot(chain_ip_lognormal[2:2:end]; lw=0.5, alpha=0.8)
@time plot(chain_ip_lognormal[["a"; "b"; ["uE[$i]" for i in 1:10:d]]][2:2:end]; lw=0.5, alpha=0.8)

  0.191283 seconds (498.92 k allocations: 19.126 MiB)

Out[36]:

In [37]:

dist_func = lognormal_dist
waic_ip_lognormal = waic(logpdf_ip, data_ip, chain_ip_lognormal)
loocv_ip_lognormal = loocv(logpdf_ip, data_ip, chain_ip_lognormal)
@show waic_ip_lognormal
@show loocv_ip_lognormal;

waic_ip_lognormal = (WAIC = 972.3905600179592, T = 340.6357318256695, V = 631.7548281922897)
loocv_ip_lognormal = 538.6737465602005

In [38]:

@time chain_ip_weibull = psample(model_ip(weibull_dist), NUTS(), L, nchains);

┌ Info: Found initial step size
│   ϵ = 0.003125
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\JedVU\src\inference\hmc.jl:553
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC C:\Users\genkuroki\.julia\packages\AdvancedHMC\haUrH\src\hamiltonian.jl:44
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:07:10

434.839251 seconds (4.23 G allocations: 1.113 TiB, 32.28% gc time)

In [39]:

#chain_ip_weibull
chain_ip_weibull[["a"; "b"; ["uE[$i]" for i in 1:10:d]]]

Out[39]:

Object of type Chains, with data of type 1000×11×3 Array{Real,3}

Iterations        = 1:1000
Thinning interval = 1
Chains            = 1, 2, 3
Samples per chain = 1000
parameters        = a, b, uE[1], uE[11], uE[21], uE[31], uE[41], uE[51], uE[61], uE[71], uE[81]

2-element Array{ChainDataFrame,1}

Summary Statistics
  parameters    mean     std  naive_se    mcse        ess   r_hat
  ──────────  ──────  ──────  ────────  ──────  ─────────  ──────
           a  1.0667  0.1770    0.0032  0.0057   586.4088  1.0029
           b  1.9737  0.0795    0.0015  0.0026  1240.8634  1.0018
       uE[1]  0.2730  0.2042    0.0037  0.0032  4263.9176  0.9992
      uE[11]  0.3301  0.2105    0.0038  0.0029  4085.7993  0.9992
      uE[21]  0.8386  0.0723    0.0013  0.0012  3432.4125  0.9995
      uE[31]  0.5108  0.2805    0.0051  0.0041  4814.8101  0.9992
      uE[41]  0.8814  0.0620    0.0011  0.0012  3859.4796  0.9997
      uE[51]  0.8993  0.0616    0.0011  0.0010  3319.3782  1.0010
      uE[61]  0.8838  0.0797    0.0015  0.0014  3076.8869  0.9998
      uE[71]  0.8619  0.0624    0.0011  0.0011  3116.6802  1.0003
      uE[81]  0.9376  0.0477    0.0009  0.0009  3274.0768  1.0006

Quantiles
  parameters    2.5%   25.0%   50.0%   75.0%   97.5%
  ──────────  ──────  ──────  ──────  ──────  ──────
           a  0.7096  0.9468  1.0753  1.1841  1.4162
           b  1.8355  1.9207  1.9671  2.0218  2.1505
       uE[1]  0.0097  0.1053  0.2315  0.4022  0.7507
      uE[11]  0.0147  0.1604  0.3011  0.4751  0.7875
      uE[21]  0.6821  0.7960  0.8418  0.8903  0.9644
      uE[31]  0.0310  0.2739  0.5225  0.7464  0.9727
      uE[41]  0.7494  0.8445  0.8861  0.9268  0.9835
      uE[51]  0.7583  0.8626  0.9056  0.9466  0.9935
      uE[61]  0.6962  0.8367  0.8963  0.9439  0.9935
      uE[71]  0.7268  0.8246  0.8657  0.9065  0.9689
      uE[81]  0.8252  0.9122  0.9471  0.9731  0.9971

In [40]:

#@time plot(chain_ip_weibull[2:2:end]; lw=0.5, alpha=0.8)
@time plot(chain_ip_weibull[["a"; "b"; ["uE[$i]" for i in 1:10:d]]][2:2:end]; lw=0.5, alpha=0.8)

  0.252323 seconds (499.00 k allocations: 19.127 MiB, 12.07% gc time)

Out[40]:

In [41]:

dist_func = weibull_dist
waic_ip_weibull = waic(logpdf_ip, data_ip, chain_ip_weibull)
loocv_ip_weibull = loocv(logpdf_ip, data_ip, chain_ip_weibull)
@show waic_ip_weibull
@show loocv_ip_weibull;

waic_ip_weibull = (WAIC = 749.4770381373378, T = 336.23477843334075, V = 413.24225970399704)
loocv_ip_weibull = 495.36764410126403

In [42]:

@show waic_ip_gamma |> mrd
@show waic_ip_lognormal |> mrd
@show waic_ip_weibull |> mrd
println()
@show loocv_ip_gamma |> mrd
@show loocv_ip_lognormal |> mrd
@show loocv_ip_weibull |> mrd
;

waic_ip_gamma |> mrd = (WAIC = 828.091, T = 345.515, V = 482.577)
waic_ip_lognormal |> mrd = (WAIC = 972.391, T = 340.636, V = 631.755)
waic_ip_weibull |> mrd = (WAIC = 749.477, T = 336.235, V = 413.242)

loocv_ip_gamma |> mrd = 521.761
loocv_ip_lognormal |> mrd = 538.674
loocv_ip_weibull |> mrd = 495.368

"n=88" の推定とみなしたときのWAICとLOOCV (1)¶

内部パラメーター tE を数値積分で消去して計算.

In [43]:

data_ipr = hcat(tStartExposure, tEndExposure, tSymptomOnset)

function logpdf_ipr(x, w; dist_func=dist_func)
    log(quadgk(t -> pdf(dist_func(w[1], w[2]), x[3] - (x[1] + t*(x[2] - x[1]))), 0.0, 1.0)[1])
end

vars_ipr = [:a, :b]
@show vars_ipr;

vars_ipr = [:a, :b]

In [44]:

dist_func = gamma_dist
@time lp_ipr_gamma = logpdf_array(logpdf_ipr, data_ipr, chain_ip_gamma, vars_ipr)
waic_ipr_gamma = waic(lp_ipr_gamma)
loocv_ipr_gamma = loocv(lp_ipr_gamma)
@show waic_ipr_gamma
@show loocv_ipr_gamma;

  9.666607 seconds (19.82 M allocations: 556.247 MiB, 1.85% gc time)
waic_ipr_gamma = (WAIC = 599.1654924201086, T = 595.9300712884439, V = 3.2354211316647365)
loocv_ipr_gamma = 599.1791611925086

In [45]:

dist_func = lognormal_dist
@time lp_ipr_lognormal = logpdf_array(logpdf_ipr, data_ipr, chain_ip_lognormal, vars_ipr)
waic_ipr_lognormal = waic(lp_ipr_lognormal)
loocv_ipr_lognormal = loocv(lp_ipr_lognormal)
@show waic_ipr_lognormal
@show loocv_ipr_lognormal;

  3.418319 seconds (19.02 M allocations: 464.205 MiB, 3.95% gc time)
waic_ipr_lognormal = (WAIC = 599.3777169513845, T = 595.9737004437031, V = 3.404016507681404)
loocv_ipr_lognormal = 599.3925406158681

In [46]:

dist_func = weibull_dist
@time lp_ipr_weibull = logpdf_array(logpdf_ipr, data_ipr, chain_ip_weibull, vars_ipr)
waic_ipr_weibull = waic(lp_ipr_weibull)
loocv_ipr_weibull = loocv(lp_ipr_weibull)
@show waic_ipr_weibull
@show loocv_ipr_weibull;

  8.177001 seconds (18.92 M allocations: 460.334 MiB, 1.09% gc time)
waic_ipr_weibull = (WAIC = 599.5004579233479, T = 596.3009392417537, V = 3.19951868159425)
loocv_ipr_weibull = 599.5113998705508

In [47]:

@show waic_ipr_gamma |> mrd
@show waic_ipr_lognormal |> mrd
@show waic_ipr_weibull |> mrd
println()
@show loocv_ipr_gamma |> mrd
@show loocv_ipr_lognormal |> mrd
@show loocv_ipr_weibull |> mrd
;

waic_ipr_gamma |> mrd = (WAIC = 599.165, T = 595.93, V = 3.235)
waic_ipr_lognormal |> mrd = (WAIC = 599.378, T = 595.974, V = 3.404)
waic_ipr_weibull |> mrd = (WAIC = 599.5, T = 596.301, V = 3.2)

loocv_ipr_gamma |> mrd = 599.179
loocv_ipr_lognormal |> mrd = 599.393
loocv_ipr_weibull |> mrd = 599.511

潜伏期間の長さの予測分布¶

In [48]:

w_gamma = get_posterior_sample(chain_ip_gamma, vars_ipr)
function pdf_ipr_gamma(t)
    mean(pdf(gamma_dist(w_gamma[l,1], w_gamma[l,2]), t) for l in 1:size(w_gamma, 1))
end

w_lognormal = get_posterior_sample(chain_ip_lognormal, vars_ipr)
function pdf_ipr_lognormal(t)
    mean(pdf(lognormal_dist(w_lognormal[l,1], w_lognormal[l,2]), t) for l in 1:size(w_lognormal, 1))
end

w_weibull = get_posterior_sample(chain_ip_weibull, vars_ipr)
function pdf_ipr_weibull(t)
    mean(pdf(weibull_dist(w_weibull[l,1], w_weibull[l,2]), t) for l in 1:size(w_weibull, 1))
end

t = range(0, 25, length=400)
plot(size=(500, 300), legendfontsize=10)
plot!(xtick=0:25)
plot!(t, pdf_ipr_gamma.(t); label="gamma", ls=:solid)
plot!(t, pdf_ipr_lognormal.(t); label="lognormal", ls=:dash)
plot!(t, pdf_ipr_weibull.(t); label="weibull", ls=:dashdot)

Out[48]:

"n=88" の推定とみなしたときのWAICとLOOCV (2)¶

内部パラメーター uE を積分で消去した結果を cdf 函数で表示して計算.

擬似確率プログラミング言語でモデルは以下のように書ける:

$$ \begin{aligned} & a \sim \operatorname{Flat}(\mathbb{R}), \\ & b \sim \operatorname{Flat}(\mathbb{R}), \\ & u_k \sim \operatorname{Uniform}(0, 1), \\ & t_{\text{onset},k} - (t_{\text{start},k} + u_k(t_{\text{end},k} - t_{\text{start},k}))\sim \operatorname{ModelDist}(a, b) \quad (k=1,\ldots,n) \end{aligned} $$

ここで, $t_{\text{start},k}, t_{\text{end},k}, t_{\text{onset}, k}$ がデータで, $a, b, u_k$ がモデルのパラメータであり, $\operatorname{ModelDist}$ は Gamma, LogNormal, Weibullのどれかである.

このモデルの内部パラメータ $u_k$ を消去しよう. まず, $\operatorname{IntegratedModelDist}(t_{\text{onset}}|a, b, t_{\text{start}}, t_{\text{end}})$ を

$$ \begin{aligned} & p_{\operatorname{IntegratedModelDist}}(t_{\text{onset}}|t_{\text{start}},t_{\text{end}},a,b) \\ & = \int_0^1 p_{\operatorname{ModelDist}} (t_{\text{onset}} - (t_{\text{start}} + u(t_{\text{end}} - t_{\text{start}}))|a,b) \,du \\ & = \begin{cases} p_{\operatorname{ModelDist}} (t_{\text{onset}} - t_{\text{start}}|a,b) & (t_{\text{end}} = t_{\text{start}}) \\ \dfrac { F_{\operatorname{ModelDist}}(t_{\text{onset}} - t_{\text{end}}|a,b) - F_{\operatorname{ModelDist}}(t_{\text{onset}} - t_{\text{start}}|a,b) } {-t_{\text{end}} + t_{\text{start}}} & (\text{otherwise}) \end{cases} \end{aligned} $$

と定義する. ここで $F$ は累積分布函数を表す. 積分によって $u$ を削除したモデルは以下のように書ける:

$$ \begin{aligned} & a \sim \operatorname{Flat}(\mathbb{R}), \\ & b \sim \operatorname{Flat}(\mathbb{R}), \\ & t_{\text{onset},k} \sim \operatorname{IntegratedModelDist}(t_{\text{start},k}, t_{\text{end},k}, a, b) \quad (k=1,\ldots,n) \end{aligned} $$

このパラメータが2個のモデルとして, WAICやLOOCVは計算されなければいけない.

In [49]:

data_ip2 = hcat(tStartExposure, tEndExposure, tSymptomOnset)

function logpdf_ip2(x, w; dist_func=dist_func)
    z = if x[1] == x[2]
        pdf(dist_func(w[1], w[2]), x[3] - x[1])
    else
        (cdf(dist_func(w[1], w[2]), x[3] - x[2]) - cdf(dist_func(w[1], w[2]), x[3] - x[1]))/(-x[2] + x[1])
    end
    log(z)
end

vars_ip2 = [:a, :b]
@show vars_ip2;

vars_ip2 = [:a, :b]

In [50]:

k, l = 67, 1234

println((k = k, data_ip2_k_1 = data_ip2[k,1], data_ip2_k_2 = data_ip2[k,2]))

dist_func = gamma_dist
ps_gamma = get_posterior_sample(chain_ip_gamma, vars_ip2)
@show logpdf_ipr(data_ip2[k,:], ps_gamma[l,:])
@show logpdf_ip2(data_ip2[k,:], ps_gamma[l,:])

dist_func = lognormal_dist
ps_gamma = get_posterior_sample(chain_ip_lognormal, vars_ip2)
@show logpdf_ipr(data_ip2[k,:], ps_gamma[l,:])
@show logpdf_ip2(data_ip2[k,:], ps_gamma[l,:])

dist_func = weibull_dist
ps_gamma = get_posterior_sample(chain_ip_weibull, vars_ip2)
@show logpdf_ipr(data_ip2[k,:], ps_gamma[l,:])
@show logpdf_ip2(data_ip2[k,:], ps_gamma[l,:])
;

(k = 67, data_ip2_k_1 = 19, data_ip2_k_2 = 19)
logpdf_ipr(data_ip2[k, :], ps_gamma[l, :]) = -2.0502878321516813
logpdf_ip2(data_ip2[k, :], ps_gamma[l, :]) = -2.0502878321516813
logpdf_ipr(data_ip2[k, :], ps_gamma[l, :]) = -2.057468864082485
logpdf_ip2(data_ip2[k, :], ps_gamma[l, :]) = -2.057468864082485
logpdf_ipr(data_ip2[k, :], ps_gamma[l, :]) = -2.0894116910323914
logpdf_ip2(data_ip2[k, :], ps_gamma[l, :]) = -2.0894116910323914

In [51]:

k, l = 10, 1234

println((k = k, data_ip2_k_1 = data_ip2[k,1], data_ip2_k_2 = data_ip2[k,2]))

dist_func = gamma_dist
ps_gamma = get_posterior_sample(chain_ip_gamma, vars_ip2)
@show logpdf_ipr(data_ip2[k,:], ps_gamma[l,:])
@show logpdf_ip2(data_ip2[k,:], ps_gamma[l,:])

dist_func = lognormal_dist
ps_lognormal = get_posterior_sample(chain_ip_lognormal, vars_ip2)
@show logpdf_ipr(data_ip2[k,:], ps_lognormal[l,:])
@show logpdf_ip2(data_ip2[k,:], ps_lognormal[l,:])

dist_func = weibull_dist
ps_weibull = get_posterior_sample(chain_ip_weibull, vars_ip2)
@show logpdf_ipr(data_ip2[k,:], ps_weibull[l,:])
@show logpdf_ip2(data_ip2[k,:], ps_weibull[l,:])
;

(k = 10, data_ip2_k_1 = -18, data_ip2_k_2 = 15)
logpdf_ipr(data_ip2[k, :], ps_gamma[l, :]) = -3.497768038056459
logpdf_ip2(data_ip2[k, :], ps_gamma[l, :]) = -3.4977680380564586
logpdf_ipr(data_ip2[k, :], ps_lognormal[l, :]) = -3.4972477778287336
logpdf_ip2(data_ip2[k, :], ps_lognormal[l, :]) = -3.4972477778287336
logpdf_ipr(data_ip2[k, :], ps_weibull[l, :]) = -3.5045217380470794
logpdf_ip2(data_ip2[k, :], ps_weibull[l, :]) = -3.50452173804708

In [52]:

dist_func = gamma_dist
@time lp_ip2_gamma = logpdf_array(logpdf_ip2, data_ip2, chain_ip_gamma, vars_ip2)
waic_ip2_gamma = waic(lp_ip2_gamma)
loocv_ip2_gamma = loocv(lp_ip2_gamma)
@show waic_ip2_gamma
@show loocv_ip2_gamma;

  0.467945 seconds (934.50 k allocations: 35.991 MiB, 8.06% gc time)
waic_ip2_gamma = (WAIC = 599.1654924201018, T = 595.9300712884362, V = 3.235421131665539)
loocv_ip2_gamma = 599.1791611925016

In [53]:

dist_func = lognormal_dist
@time lp_ip2_lognormal = logpdf_array(logpdf_ip2, data_ip2, chain_ip_lognormal, vars_ip2)
waic_ip2_lognormal = waic(lp_ip2_lognormal)
loocv_ip2_lognormal = loocv(lp_ip2_lognormal)
@show waic_ip2_lognormal
@show loocv_ip2_lognormal;

  0.130799 seconds (867.63 k allocations: 32.887 MiB)
waic_ip2_lognormal = (WAIC = 599.3777169514955, T = 595.9737004438692, V = 3.404016507626242)
loocv_ip2_lognormal = 599.3925406159791

In [54]:

dist_func = weibull_dist
@time lp_ip2_weibull = logpdf_array(logpdf_ip2, data_ip2, chain_ip_weibull, vars_ip2)
waic_ip2_weibull = waic(lp_ip2_weibull)
loocv_ip2_weibull = loocv(lp_ip2_weibull)
@show waic_ip2_weibull
@show loocv_ip2_weibull;

  0.170888 seconds (867.16 k allocations: 32.866 MiB)
waic_ip2_weibull = (WAIC = 599.5004579231576, T = 596.3009392415395, V = 3.1995186816180534)
loocv_ip2_weibull = 599.5113998703594

In [55]:

@show waic_ip2_gamma |> mrd
@show waic_ip2_lognormal |> mrd
@show waic_ip2_weibull |> mrd
println()
@show loocv_ip2_gamma |> mrd
@show loocv_ip2_lognormal |> mrd
@show loocv_ip2_weibull |> mrd
;

waic_ip2_gamma |> mrd = (WAIC = 599.165, T = 595.93, V = 3.235)
waic_ip2_lognormal |> mrd = (WAIC = 599.378, T = 595.974, V = 3.404)
waic_ip2_weibull |> mrd = (WAIC = 599.5, T = 596.301, V = 3.2)

loocv_ip2_gamma |> mrd = 599.179
loocv_ip2_lognormal |> mrd = 599.393
loocv_ip2_weibull |> mrd = 599.511

計算速度が圧倒的に速くなっていてかつ, 以下に再掲する数値積分(quadgk函数)を使った計算の結果(既出)と値が一致している.

In [56]:

@show waic_ipr_gamma |> mrd
@show waic_ipr_lognormal |> mrd
@show waic_ipr_weibull |> mrd
println()
@show loocv_ipr_gamma |> mrd
@show loocv_ipr_lognormal |> mrd
@show loocv_ipr_weibull |> mrd
;

waic_ipr_gamma |> mrd = (WAIC = 599.165, T = 595.93, V = 3.235)
waic_ipr_lognormal |> mrd = (WAIC = 599.378, T = 595.974, V = 3.404)
waic_ipr_weibull |> mrd = (WAIC = 599.5, T = 596.301, V = 3.2)

loocv_ipr_gamma |> mrd = 599.179
loocv_ipr_lognormal |> mrd = 599.393
loocv_ipr_weibull |> mrd = 599.511

In [ ]:

TuringでWAICやLOOCVを計算する方法¶

目次

WAICやLOOCVを計算する函数の定義¶

Turingが生成するchainから事後分布のサンプルを抽出¶

WAICを計算する函数¶

LOOCVを計算する函数¶

予測分布のpdfを作る函数¶

ベイズ版最小二乗法¶

複数の確率モデルの比較¶

COVID-19 の潜伏期間の推定¶

"n=1" の推定とみなしたときのWAICとLOOCV¶

"n=88" の推定とみなしたときのWAICとLOOCV (1)¶

潜伏期間の長さの予測分布¶

"n=88" の推定とみなしたときのWAICとLOOCV (2)¶