黒木玄
2017-11-16~2018-01-13
正規分布の確率密度函数を次のように書くことにする:
$$\displaystyle N(\mu,\sigma) = \frac{\exp(-(x-\mu)^2/(2\sigma^2))}{\sqrt{2\pi\sigma^2}}. $$標準正規分布 $N(0,1)$ で生成したサンプルについて, 分散を1に固定した山が2つの混合正規分布モデル mixnormal
$$\displaystyle y \sim (1-a)N(b,1) + a N(c,1) $$と分散を1に固定した正規分布モデル normal1
$$\displaystyle y \sim N(\mu,1) $$と分散も動かすパラメーター数2の正規分布モデル normal
$$\displaystyle y \sim N(\mu,\sigma) $$による推定を比較する.
このノートで作成したデータファイルは
https://genkuroki.github.io/documents/Jupyter/#2017-11-16
からダウンロードできる. 分析結果は以下の場所で閲覧できる:
n=8 http://nbviewer.jupyter.org/gist/genkuroki/cee2962916ec9269c194468a3e5cf288
n=32 http://nbviewer.jupyter.org/gist/genkuroki/41dd5005f143bda6cc678a243bca41c0
n=128 http://nbviewer.jupyter.org/gist/genkuroki/d2978887f61287d54e58baa700d2c002
警告・注意 (2018-01-13) このノートブックにおけるWBICの数値計算法は適切ではない. なぜならば, このノートブックではWBICを逆温度β=1の場合に純粋数学的には成立しているWBICの公式を利用しているからである. 実際にやってみてわかったことだが(よくよく考えると当たり前のことだが), 逆温度β=1/log(n)における事後分布平均で定義されるWBICを逆温度β=1での事後分布のサンプルを使って近似計算すると誤差が大きくなる.
Julia言語のMamba.jlパッケージで逆温度βでのベイズ推定を行う方法については
http://nbviewer.jupyter.org/gist/genkuroki/8e97ef72ec01857564bd1a68e6e63d64
を参照せよ. 単純な逆温度βの正規分布モデルの場合にWBICのexact formulaを使った実験については
http://nbviewer.jupyter.org/gist/genkuroki/8a342d0b7b249e279dd8ad6ae283c5db
を参照せよ.
versioninfo()
Julia Version 0.6.0 Commit 903644385b* (2017-06-19 13:05 UTC) Platform Info: OS: Windows (x86_64-w64-mingw32) CPU: Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz WORD_SIZE: 64 BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell) LAPACK: libopenblas64_ LIBM: libopenlibm LLVM: libLLVM-3.9.1 (ORCJIT, haswell)
@time begin
using Mamba
using KernelDensity
function makefunc_pdfkde(X)
ik = InterpKDE(kde(X))
pdfkde(x) = pdf(ik, x)
return pdfkde
end
function makefunc_pdfkde(X,Y)
ik = InterpKDE(kde((X,Y)))
pdfkde(x, y) = pdf(ik, x, y)
return pdfkde
end
using Optim
optim_options = Optim.Options(
store_trace = true,
extended_trace = true
)
using QuadGK
import PyPlot
plt = PyPlot
using Distributions
@everywhere GTDist(μ, ρ, ν) = LocationScale(Float64(μ), Float64(ρ), TDist(Float64(ν)))
@everywhere GTDist(ρ, ν) = GTDist(zero(ρ), ρ, ν)
using JLD2
using FileIO
end
macro sum(f_k, k, itr)
quote
begin
s = zero(($(esc(k))->$(esc(f_k)))($(esc(itr))[1]))
for $(esc(k)) in $(esc(itr))
s += $(esc(f_k))
end
s
end
end
end
60.473134 seconds (15.80 M allocations: 1.103 GiB, 0.79% gc time)
@sum (macro with 1 method)
@everywhere mixnormal(a,b,c) = MixtureModel(Normal[Normal(b, 1.0), Normal(c, 1.0)], [1.0-a, a])
mixnormal(w::AbstractVector) = mixnormal(w[1], w[2], w[3])
unlink_mixnormal(w) = [logit(w[1]), w[2], w[3]] # unlink_mixnormal : (0,1)×R^2 -> R^3
link_mixnormal(z) = [invlogit(z[1]), z[2], z[3]] # link_mixnormal : R^3 → (0,1)×R^2
function sample2model_mixnormal(Y;
dist_model = mixnormal,
a0 = 0.5,
b0 = 0.0,
c0 = 0.0,
prior_a = Uniform(0.0, 1.0),
prior_b = Normal(0.0, 1.0),
prior_c = Normal(0.0, 1.0),
chains = 2
)
data = Dict{Symbol, Any}(
:Y => Y,
:n => length(Y),
:a0 => a0,
:b0 => b0,
:c0 => c0,
)
model = Model(
y = Stochastic(1, (a, b, c) -> dist_model(a, b, c), false),
a = Stochastic(() -> prior_a, true),
b = Stochastic(() -> prior_b, true),
c = Stochastic(() -> prior_b, true),
)
scheme = [
NUTS([:a, :b, :c])
]
setsamplers!(model, scheme)
inits = [
Dict{Symbol, Any}(
:y => data[:Y],
:a => data[:a0],
:b => data[:b0],
:c => data[:c0],
)
for k in 1:chains
]
return model, data, inits
end
sample2model_mixnormal (generic function with 1 method)
@everywhere normal(mu, sigma) = Normal(mu, sigma)
normal(w::AbstractVector) = normal(w[1], w[2])
unlink_normal(w) = [w[1], log(w[2])] # unlink_normal : R×(0,∞) -> R^2
link_normal(z) = [z[2], exp(z[2])] # link_normal : R^2 → R×(0,∞)
function sample2model_normal(Y;
dist_model = normal,
mu0 = 0.0,
sigma0 = 1.0,
prior_mu = Normal(0.0, 1.0),
prior_sigma = Truncated(Normal(1.0, 1.0), 0, Inf),
chains = 2
)
data = Dict{Symbol, Any}(
:Y => Y,
:n => length(Y),
:mu0 => mu0,
:sigma0 => sigma0,
)
model = Model(
y = Stochastic(1, (mu, sigma) -> dist_model(mu, sigma), false),
mu = Stochastic(() -> prior_mu, true),
sigma = Stochastic(() -> prior_sigma, true),
)
scheme = [
NUTS([:mu, :sigma])
]
setsamplers!(model, scheme)
inits = [
Dict{Symbol, Any}(
:y => data[:Y],
:mu => data[:mu0],
:sigma => data[:sigma0],
)
for k in 1:chains
]
return model, data, inits
end
sample2model_normal (generic function with 1 method)
@everywhere normal1(mu::Real) = Normal(mu, 1.0)
normal1(w::AbstractVector) = normal1(w[1])
unlink_normal1(w) = w
link_normal1(z) = z
function sample2model_normal1(Y;
dist_model = normal1,
mu0 = 0.0,
prior_mu = Normal(0.0, 1.0),
chains = 2
)
data = Dict{Symbol, Any}(
:Y => Y,
:n => length(Y),
:mu0 => mu0,
)
model = Model(
y = Stochastic(1, mu -> dist_model(mu), false),
mu = Stochastic(() -> prior_mu, true),
)
scheme = [
NUTS([:mu])
]
setsamplers!(model, scheme)
inits = [
Dict{Symbol, Any}(
:y => data[:Y],
:mu => data[:mu0],
)
for k in 1:chains
]
return model, data, inits
end
sample2model_normal1 (generic function with 1 method)
## Summary
function showsummary(sim;
sortkeys=true, figsize_t=(8, 3), figsize_c=(8, 3.5),
show_describe=true, show_gelman=true, plot_trace=true, plot_contour=true)
## Summary of MCMC
if show_describe
println("\n========== Summary:\n")
display(describe(sim))
end
# Convergence Diagnostics
if show_gelman && length(sim.chains) ≥ 2
println("========== Gelman Diagnostic:")
show(gelmandiag(sim))
end
## Plot
sleep(0.1)
if plot_trace
#draw(plot(sim), fmt=:png, width=10inch, height=3.5inch, nrow=1, ncol=2, ask=false)
pyplot_trace(sim, sortkeys=sortkeys, figsize=figsize_t)
end
if plot_contour
#draw(plot(sim, :contour), fmt=:png, width=10inch, height=4.5inch, nrow=1, ncol=2, ask=false)
pyplot_contour(sim, sortkeys=sortkeys, figsize=figsize_c)
end
end
## plot traces
function pyplot_trace(sim; sortkeys = false, figsize = (8, 3))
if sortkeys
keys_sim = sort(keys(sim))
else
keys_sim = keys(sim)
end
for var in keys_sim
plt.figure(figsize=figsize)
plt.subplot(1,2,1)
for k in sim.chains
plt.plot(sim.range, sim[:,var,:].value[:,1,k], label="$k", lw=0.4, alpha=0.8)
end
plt.xlabel("iterations")
plt.grid(ls=":")
#plt.legend(loc="upper right")
plt.title("trace of $var", fontsize=10)
plt.subplot(1,2,2)
xmin = quantile(vec(sim[:,var,:].value), 0.005)
xmax = quantile(vec(sim[:,var,:].value), 0.995)
for k in sim.chains
chain = sim[:,var,:].value[:,1,k]
pdfkde = makefunc_pdfkde(chain)
x = linspace(xmin, xmax, 201)
plt.plot(x, pdfkde.(x), label="$k", lw=0.8, alpha=0.8)
end
plt.xlabel("$var")
plt.grid(ls=":")
plt.title("empirical posterior pdf of $var", fontsize=10)
plt.tight_layout()
end
end
# plot contours
function pyplot_contour(sim; sortkeys = true, figsize = (8, 3.5))
if sortkeys
keys_sim = sort(keys(sim))
else
keys_sim = keys(sim)
end
m = 0
K = length(keys_sim)
for i in 1:K
for j in i+1:K
m += 1
mod1(m,2) == 1 && plt.figure(figsize=figsize)
plt.subplot(1,2, mod1(m,2))
u = keys_sim[i]
v = keys_sim[j]
X = vec(sim[:,u,:].value)
Y = vec(sim[:,v,:].value)
pdfkde = makefunc_pdfkde(X,Y)
xmin = quantile(X, 0.005)
xmax = quantile(X, 0.995)
ymin = quantile(Y, 0.005)
ymax = quantile(Y, 0.995)
x = linspace(xmin, xmax, 200)
y = linspace(ymin, ymax, 200)
plt.pcolormesh(x', y, pdfkde.(x',y), cmap="CMRmap")
plt.colorbar()
plt.grid(ls=":")
plt.xlabel(u)
plt.ylabel(v)
plt.title("posterior of ($u, $v)", fontsize=10)
mod1(m,2) == 2 && plt.tight_layout()
if 2*m == K*(K-1) && mod1(m,2) == 1
plt.subplot(1,2,2)
plt.pcolormesh(y', x, pdfkde.(x,y'), cmap="CMRmap")
plt.colorbar()
plt.grid(ls=":")
plt.xlabel(v)
plt.ylabel(u)
plt.title("posterior of ($v, $u)", fontsize=10)
plt.tight_layout()
end
end
end
end
pyplot_contour (generic function with 1 method)
以下において, 各種情報量規準の定義と計算の仕方を説明する.
情報量規準の表示のスケールには, Kullback-Leibler情報量のスケールと対数尤度比によるカイ二乗検定のスケールの2種類があるが, 以下では伝統的な後者のスケールに合わせて説明する. 対数尤度比によるカイ二乗検定のスケールからKL情報量のスケールに移るためには, サンプルサイズの2倍で割ればよい.
$X_1,\ldots,X_n$ はサイズ $n$ のサンプルであり, $d$ は確率モデル $p(x|w)$ のパラメーターの数であり($w=(w_1,\ldots,w_d$)), $\hat{w}$ は最大尤度を与えるパラメーターであるとする:
$$\displaystyle \min_w \left(-\sum_{i=1}^n \log p(X_k|w)\right) = -\sum_{i=1}^n \log p(X_k|\hat{w}). $$最尤法の予測分布:
$$\displaystyle p^*_\mathrm{MLE}(x) = p(x|\hat{w}). $$AICの計算の仕方:
$$\displaystyle \mathrm{AIC} := 2\left(-\sum_{k=1}^n\log p(X_k|\hat{w})\right) + 2d. $$BICの計算の仕方:
$$\displaystyle \mathrm{BIC} := 2\left(-\sum_{k=1}^n\log p(X_k|\hat{w})\right) + d\log n. $$MCMCの結果得られる事後分布のサンプルを $w_1,w_2,\ldots,w_L$ と書く.
事後予測分布とWAICとLOOCV(一個抜き交差検証)とWBICは対数尤度の配列
$$ \{\,\log p(X_k|w_l)\mid k=1,2,\ldots,n,\; l=1,2,\ldots,L\,\} $$のみから近似計算可能である. 事後予測分布, WAIC, LOOCV, WBIC を計算するための最初の作業はこの配列をMCMCの結果から抽出することである. その配列を抽出できてさえしまえば, それらは以下の公式によって機械的に計算される.
Bayes推定の事後予測分布:
$$\displaystyle p^*_\mathrm{Bayesian}(x) := (\mathrm{posterior\ mean\ of}\ p(x|w)) \approx \left(\mathrm{mean\ of}\ \{p(x|w_l)\}_{l=1}^L\right) = \left(\mathrm{mean\ of}\ \{\exp(\log p(x|w_l))\}_{l=1}^L\right). $$WAICの計算の仕方:
$$\displaystyle \mathrm{WAIC} := T_{\mathrm{WAIC}} + V_{\mathrm{WAIC}}. $$ここで $$\begin{aligned} & T_{\mathrm{WAIC}} := -2\sum_{k=1}^n \log p^*(X_k) \approx -2\sum_{k=1}^n\log\left(\mathrm{mean\ of}\ \{\exp(\log p(X_k|w_l))\}_{l=1}^L\right), \\ & V_{\mathrm{WAIC}} := 2\sum_{k=1}^n \left(\mathrm{posterior\ variance\ of}\ \log p(X_k|w)\right) \approx 2\sum_{k=1}^n \left(\mathrm{variance\ of}\ \{\log p(X_k|w_l)\}_{l=1}^L\right). \end{aligned}$$
LOOCV(一個抜き交差検証)の素朴な計算の仕方
$$\begin{aligned} \mathrm{LOOCV} &:= -2\sum_{k=1}^n \log E^{(k)}_w[p(X_k|w)] = 2\sum_{k=1}^n \log E_w\left[\frac{1}{p(X_k|w)}\right] \\ & \approx 2\sum_{k=1}^n \log\left(\mathrm{mean\ of}\ \{\exp(-\log p(X_k|w_l))\}_{l=1}^L\right). \end{aligned}$$ここで
$$\begin{aligned} & Z_n := \int \prod_{j=1}^n p(X_j|w)\;\varphi(w)\,dw, \\ & E_w[f(w)] := \frac{1}{Z_n}\int f(w)\prod_{j=1}^n p(X_j|w)\;\varphi(w)\,dw \approx \left(\mathrm{mean\ of}\ \{f(w_l)\}_{l=1}^L\right), \\ & Z^{(k)}_n := \int \frac{\prod_{j=1}^n p(X_j|w)}{p(X_k|w)}\varphi(w)\,dw = Z_n E_w\left[\frac{1}{p(X_k|w)}\right], \\ & E^{(k)}_w[f(w)] := \frac{1}{Z^{(k)}_n}\int f(w)\frac{\prod_{j=1}^n p(X_j|w)}{p(X_k|w)}\varphi(w)\,dw = \frac{Z_n}{Z^{(k)}_n}E_w\left[\frac{f(w)}{p(X_k|w)}\right] = \frac{E_w[f(w)/p(X_k|w)]}{E_w[1/p(X_k|w)]}, \\ & \log E^{(k)}_w[p(X_k|w)] = \log\frac{E_w[1]}{E_w[1/p(X_k|w)]} = -\log E_w\left[\frac{1}{p(X_k|w)}\right] \\ & \qquad \approx -\log\left(\mathrm{mean\ of}\ \left\{\frac{1}{p(X_k|w_l)}\right\}_{l=1}^L\right) = -\log\left(\mathrm{mean\ of}\ \{\exp(-\log p(X_k|w_l))\}_{l=1}^L\right). \end{aligned}$$WBICの計算の仕方: $\beta=1/\log n$ とおき,
$$\displaystyle \mathrm{WBIC} := 2E^{\beta}_w\left[nL_n(w)\right] = 2\frac{N_{\mathrm{WBIC}}}{D_{\mathrm{WBIC}}}. $$ここで,
$$\begin{aligned} nL_n(w) &:= -\sum_{k=1}^n \log p(X_k|w), \quad nL_n(w_l) = -\sum_{k=1}^n \log p(X_k|w_l), \\ Z_n(\beta) &:= \int \left(\prod_{k=1}^n p(X_k|w)\right)^\beta\;\varphi(w)\,dw, \quad Z_n(\beta)E^{\beta}_w\left[f(w)\right] := \int f(w)\left(\prod_{k=1}^n p(X_k|w)\right)^\beta\;\varphi(w)\,dw \\ N_{\mathrm{WBIC}} &:= \frac{Z_n(\beta)E^{\beta}_w\left[nL_n(w)\right]}{Z_n(1)} = \frac{1}{Z_n(1)} \int nL_n(w)\left(\prod_{k=1}^n p(X_k|w)\right)^{\beta-1} \prod_{k=1}^n p(X_k|w)\;\varphi(w)\,dw \\ & \approx \left(\mathrm{mean\ of}\ \left\{nL_n(w_l) \exp(-(\beta-1)nL_n(w_l))\right\}_{l=1}^L\right), \\ D_{\mathrm{WBIC}} &:= \frac{Z_n(\beta)}{Z_n(1)} = \frac{1}{Z_n(1)}\int \left(\prod_{k=1}^n p(X_k|w)\right)^{\beta-1} \prod_{k=1}^n p(X_k|w)\;\varphi(w)\,dw \\ & \approx \left(\mathrm{mean\ of}\ \left\{\exp(-(\beta-1)nL_n(w_l))\right\}_{l=1}^L\right). \end{aligned}$$WBICは逆温度 $1$ の事後分布のサンプル上での対数尤度函数の $-1$ 倍の値達 $nL_n(w_l)$ のみから近似計算可能である.
WBICの近似計算のためには逆温度 $\beta$ の事後分布のサンプルを構成する必要がないことに注意せよ!
文献: 以上のWBICの計算法では,
http://www.jmlr.org/papers/volume14/watanabe13a/watanabe13a.pdf p.877 (p.11 in file)
の式(20)を $\beta_1=1$, $\beta_2=\beta$ の場合に適用した結果を使った.
2017-11-10 メモ: このノートブックにおけるWBICの計算はMCMCの結果に大きく依存し、分散が大きい。もしかしたら、何か間違ったことをしているかもしれない.
2017-11-12 追記 (自由エネルギーのより正確な計算法): WBIC導出の第一段階は, 自由エネルギー $F_n(\beta)$ の逆温度 $\beta$ による微分が $nL_n(w)$ の逆温度 $\beta$ の事後分布に関する平均になり, $F_n(0)=0$ なので
$$\displaystyle F_n(1) = \int_0^1 E^\beta_w[nL_n(w)] \,d\beta \tag{$*$} $$となることを示すことである(これはとても易しい). WBIC導出の第二段階は右辺の積分が $\beta=1/\log n$ のときの $E^\beta[nL_n(w)]$ で近似できることを示すことである(こちらは少々非自明である).
($*$)の導出: $$ \begin{aligned} nL_n(w) &:= -\sum_{k=1}^n \log p(X_k|w), \\ Z_n(\beta) &:= \int \left(\prod_{k=1}^n p(X_k|w)\right)^\beta \varphi(w)\,dw = \int\exp\left(-\beta nL_n(w)\right)\varphi(w)\,dw, \\ Z_n'(\beta) &= -\int nL_n(w)\exp\left(-\beta nL_n(w)\right)\varphi(w)\,dw, \\ E^\beta_w[f(w)] &:= \frac{1}{Z_n(\beta)}\int f(w)\left(\prod_{k=1}^n p(X_k|w)\right)^\beta \varphi(w)\,dw = \frac{\int\exp\left(-\beta nL_n(w)\right)\varphi(w)\,dw}{Z_n(\beta)}, \\ F_n(\beta) &:= -\log Z_n(\beta), \\ F_n(0) &= -\log Z_n(0) = -\log\int\varphi(w)\,dw = -\log 1 = 0, \\ F_n'(\beta) &= \frac{-Z_n'(\beta)}{Z_n(\beta)} = \frac{\int nL_n(w)\exp\left(-\beta nL_n(w)\right)\varphi(w)\,dw}{Z_n(\beta)} = E^\beta_w[nL_n(w)]. \\ \therefore \quad F_n(1) &= \int_0^1 E^\beta_w[nL_n(w)]\,d\beta. \end{aligned} $$
$E^\beta_w[nL_n(w)]$ を $\beta$ で数値積分すれば自由エネルギー $F_n(1)$ を数値計算できる. 数値積分の分だけ必要な計算量は増えるが, 被積分函数は $\beta$ の1変数函数なので, モデルのパラメーターに関する多変数函数の積分をするよりも必要な計算量は圧倒的に少なくてすむ.
$E^\beta_w[nL_n(w)]$ は次のように表せる:
$$\displaystyle E^{\beta}_w\left[nL_n(w)\right] = \frac{N(\beta)}{D(\beta)}. $$ここで,
$$\begin{aligned} N(\beta) &= \frac{Z_n(\beta)E^{\beta}_w\left[nL_n(w)\right]}{Z_n(1)} = \frac{1}{Z_n(1)} \int nL_n(w)\left(\prod_{k=1}^n p(X_k|w)\right)^{\beta-1} \prod_{k=1}^n p(X_k|w)\;\varphi(w)\,dw \\ & \approx \left(\mathrm{mean\ of}\ \left\{nL_n(w_l) \exp(-(\beta-1)nL_n(w_l))\right\}_{l=1}^L\right), \\ D(\beta) &= \frac{Z_n(\beta)}{Z_n(1)} = \frac{1}{Z_n(1)} \int \left(\prod_{k=1}^n p(X_k|w)\right)^{\beta-1} \prod_{k=1}^n p(X_k|w)\;\varphi(w)\,dw \\ & \approx \left(\mathrm{mean\ of}\ \left\{\exp(-(\beta-1)nL_n(w_l))\right\}_{l=1}^L\right). \end{aligned}$$注意: 伝統的なAICのスケールに合わせるためには
$$\displaystyle 2F_n(1) = 2\int_0^1 E^{\beta}_w\left[nL_n(w)\right]\,d\beta = 2\int_0^1\frac{N(\beta)}{D(\beta)}\,d\beta $$を計算すればよい. 以下のセルでは以上の方法でこのスケールで自由エネルギーを計算する函数を定義している.
# loglik[l,i] = lpdf(w_l, Y_i) と chain[l,:] = w_l を取り出す函数を作る函数
#
function makefunc_loglikchainof(lpdf, symbols, Y)
#
# loglikchainof(sim) で loglik[l,i] と chain[l,:] が抽出される
#
function loglikchainof(sim)
val = sim[:, symbols, :].value
chain = vcat((val[:,:,k] for k in 1:size(val,3))...)
L = size(chain,1)
n = length(Y)
loglik = Array{Float64, 2}(L, n)
for i in 1:n
for l in 1:L
loglik[l,i] = lpdf(chain[l,:], Y[i])
end
end
return loglik, chain
end
return loglikchainof
end
# 予測分布函数 p^*(x,y) = mean of { lpdf(w_l, y) }_{l=1}^L を作る函数
#
function makefunc_pdfpred(lpdf, chain)
L = size(chain,1)
pred_Bayes(y) = @sum(exp(lpdf((@view chain[l,:]), y)), l, 1:L)/L
return pred_Bayes
end
# loglik[l,i] からWAICを計算する函数
#
function WAICof(loglik)
L, n = size(loglik)
T_n = -mean(log(mean(exp(loglik[l,i]) for l in 1:L)) for i in 1:n)
V_n = sum(var(loglik[:,i], corrected=false) for i in 1:n)
WAIC = 2*n*T_n + 2*V_n
WAIC, 2*n*T_n, 2*V_n
end
# loglik[l,i] からLOOCVを素朴に計算する函数
#
function LOOCVof(loglik)
L, n = size(loglik)
LOOCV = 2*sum(log(mean(exp(-loglik[l,i]) for l in 1:L)) for i in 1:n)
return LOOCV
end
# 自由エネルギー(の2倍)を計算するための函数
#
# 自由エネルギーの2の逆温度微分は E^β_w[2n L_n] に等しいので、
# それを β=0.0 から 1.0 まで数値積分すれば自由エネルギーの2倍を計算できる。
#
function FreeEnergyof(loglik)
E2nLn = makefunc_E2nLn(loglik)
F = quadgk(E2nLn, 0.0, 1.0)[1]
return F, E2nLn
end
function makefunc_E2nLn(loglik)
L = size(loglik)[1]
negloglik = -sum(loglik, 2)
negloglik_n = negloglik .- maximum(negloglik)
function E2nLn(beta)
Edenominator = @sum( exp((1-beta)*negloglik_n[l]), l, 1:L)/L
if Edenominator == zero(Edenominator) || !isfinite(Edenominator)
return zero(Edenominator)
end
Enumerator = @sum(negloglik[l]*exp((1-beta)*negloglik_n[l]), l, 1:L)/L
return 2*Enumerator/Edenominator
end
return E2nLn
end
# loglik[l,i] からWBICを計算する函数
#
function WBICof(loglik)
E2nLn = makefunc_E2nLn(loglik)
n = size(loglik, 2)
WBIC = E2nLn(1/log(n))
return WBIC
end
# 汎化損失を計算する函数
#
function GeneralizationLossof(pdftrue, pdfpred; xmin=-10.0, xmax=10.0)
f(x) = -pdftrue(x)*log(pdfpred(x))
return quadgk(f, xmin, xmax)
end
# 最尤法を実行して AIC と BIC を計算する函数
#
# lpdf(w,y) = log p(y|w)
#
# w = link_model(z) ←実ベクトル z 全体をパラメーター w の空間内に移す函数
# z = unlink_model(w) ←link_model(z)の逆函数
# これらは optimize() 函数の適用時にパラメーター w が定義域から外れることを防ぐための函数達
#
# (X[i], Y[i] はサンプル
#
# chain は loglik, chain = loglikchainof(sim) で作った chain
#
# optimize函数は1変数函数の場合には初期条件を与えて解を求める函数ではなくなるので、
# その場合には2変数函数に拡張してから使用している.
#
function AICandBICof(lpdf, link_model, unlink_model, Y, chain)
n = length(Y)
L = size(chain,1)
nparams = size(chain,2)
negloglik(z) = -@sum(lpdf(link_model(z), Y[i]), i, 1:n)
minnegloglik_chain, l
minnegloglik_chain, l = findmin(negloglik(unlink_model(chain[l,:])) for l in 1:L)
if size(chain,2) == 1
f(z) = negloglik([z[1]]) + z[2]^2/eps()
o = optimize(f, [unlink_model(chain[l,:])[1], 0.0])
minnegloglik = o.minimum
param_AIC = link_model([o.minimizer[1]])
else
o = optimize(negloglik, unlink_model(chain[l,:]))
minnegloglik = o.minimum
param_AIC = link_model(o.minimizer)
end
T_AIC = 2.0*minnegloglik
V_AIC = 2.0*nparams
T_BIC = T_AIC
V_BIC = nparams*log(n)
AIC = T_AIC + V_AIC
BIC = T_BIC + V_BIC
return AIC, T_AIC, V_AIC,
BIC, T_BIC, V_BIC,
param_AIC
end
AICandBICof (generic function with 1 method)
function statsof(sim, Y;
dist_true=mixnormal(0.0, 0.0, 0.0),
dist_model=mixnormal, link_model=link_mixnormal, unlink_model=unlink_mixnormal)
n = length(Y)
lpdf(w, y) = logpdf(dist_model(w), y)
loglikchainof = makefunc_loglikchainof(lpdf, sort(keys(sim)), Y)
loglik, chain = loglikchainof(sim)
WAIC, T_WAIC, V_WAIC = WAICof(loglik)
LOOCV = LOOCVof(loglik)
WBIC = WBICof(loglik)
FreeEnergy = FreeEnergyof(loglik)[1]
param_Bayes = vec(mean(chain, 1))
pred_Bayes = makefunc_pdfpred(lpdf, chain)
GeneralizationLoss = 2*n*GeneralizationLossof(x->pdf(dist_true,x), pred_Bayes)[1]
AIC, T_AIC, V_AIC, BIC, T_BIC, V_BIC, param_MLE = AICandBICof(lpdf, link_model, unlink_model, Y, chain)
pred_MLE(y) = exp(lpdf(param_MLE, y))
return WAIC, T_WAIC, V_WAIC, LOOCV, GeneralizationLoss,
WBIC, FreeEnergy,
param_Bayes, pred_Bayes,
AIC, T_AIC, V_AIC,
BIC, T_BIC, V_BIC,
param_MLE, pred_MLE
end
function show_all_results(dist_true, Y, sim; statsfunc=statsof,
dist_model=mixnormal, link_model=link_mixnormal, unlink_model=link_mixnormal,
figsize=(6,4.2), xmin=-4.0, xmax=6.0)
WAIC, T_WAIC, V_WAIC, LOOCV, GeneralizationLoss,
WBIC, FreeEnergy,
param_Bayes, pred_Bayes,
AIC, T_AIC, V_AIC,
BIC, T_BIC, V_BIC,
param_MLE, pred_MLE = statsfunc(sim, Y, dist_true=dist_true,
dist_model=dist_model, link_model=link_model, unlink_model=unlink_model)
n = length(Y)
println("\n=== Estimates by $dist_model (n = $n) ===")
@show param_Bayes
@show param_MLE
println("--- Information Criterions")
println("* AIC = $AIC = $T_AIC + $V_AIC")
println("* GenLoss = $GeneralizationLoss")
println("* WAIC = $WAIC = $T_WAIC + $V_WAIC")
println("* LOOCV = $LOOCV")
println("---")
println("* BIC = $BIC = $T_BIC + $V_BIC")
println("* FreeEnergy = $FreeEnergy")
println("* WBIC = $WBIC")
println("="^78 * "\n")
sleep(0.1)
plt.figure(figsize=figsize)
kde_sample = makefunc_pdfkde(Y)
x = linspace(xmin, xmax, 201)
plt.plot(x, pdf.(dist_true, x), label="true distribution")
plt.scatter(Y, kde_sample.(Y), label="sample", s=10, color="k", alpha=0.5)
plt.plot(x, kde_sample.(x), label="KDE of sample", color="k", alpha=0.5)
plt.plot(x, pred_Bayes.(x), label="Baysian predictive", ls="--")
plt.plot(x, pred_MLE.(x), label="MLE predictive", ls="-.")
plt.xlabel("x")
plt.ylabel("probability density")
plt.grid(ls=":")
plt.legend(fontsize=8)
plt.title("Estimates by $dist_model: n = $(length(Y))")
end
function plotsample(dist_true, Y; figsize=(6,4.2), xmin=-4.0, xmax=6.0)
sleep(0.1)
plt.figure(figsize=figsize)
kde_sample = makefunc_pdfkde(Y)
x = linspace(xmin, xmax, 201)
plt.plot(x, pdf.(dist_true, x), label="true dist.")
plt.scatter(Y, kde_sample.(Y), label="sample", s=10, color="k", alpha=0.5)
plt.plot(x, kde_sample.(x), label="KDE of sample", color="k", alpha=0.5)
plt.xlabel("x")
plt.ylabel("probability density")
plt.grid(ls=":")
plt.legend(fontsize=8)
plt.title("Sample size n = $(length(Y))")
end
plotsample (generic function with 1 method)
次のセルの定義はこのノートでは使用しない.
# dist_true 分布に従う乱数で生成したサンプルの配列 Y を与えると
# 正規分布モデルの最尤法で推定して、AICなどを返す函数
#
function fit_Normal(dist_true, Y)
n = length(Y)
d = fit(Normal,Y)
mu = d.μ
sigma = d.σ
pred_Normal(y) = pdf(d,y)
T_Normal = -2*sum(logpdf.(d,Y))
V_Normal = 4.0
AIC_Normal = T_Normal + V_Normal
TB_Normal = T_Normal
VB_Normal = 2.0*log(n)
BIC_Normal = TB_Normal + VB_Normal
f(y) = -pdf(dist_true, y)*logpdf(d, y)
GL_Normal = 2*n*quadgk(f, -10, 10)[1]
return mu, sigma, pred_Normal,
AIC_Normal, T_Normal, V_Normal,
BIC_Normal, TB_Normal, VB_Normal,
GL_Normal
end
# グラフをプロットする範囲が xmin から xmax まで
#
function show_fit_Normal(dist_true, Y; figsize=(6,4.2), xmin=-4.0, xmax=6.0)
mu, sigma, pred_Normal,
AIC_Normal, T_Normal, V_Normal,
BIC_Normal, TB_Normal, VB_Normal,
GL_Normal = fit_Normal(dist_true, Y)
println("--- Normal Fitting")
println("* μ = $mu")
println("* σ = $sigma")
println("* GenLoss = $GL_Normal")
println("* AIC = $AIC_Normal = $T_Normal + $V_Normal")
println("* BIC = $BIC_Normal = $TB_Normal + $VB_Normal")
sleep(0.1)
plt.figure(figsize=figsize)
kde_sample = makefunc_pdfkde(Y)
x = linspace(xmin, xmax, 201)
plt.plot(x, pdf.(dist_true, x), label="true distribution")
plt.scatter(Y, kde_sample.(Y), label="sample", s=10, color="k", alpha=0.5)
plt.plot(x, kde_sample.(x), label="KDE of sample", color="k", alpha=0.5)
plt.plot(x, pred_Normal.(x), label="Normal predictive", ls="--")
plt.xlabel("x")
plt.ylabel("probability density")
plt.grid(ls=":")
plt.legend(fontsize=8)
plt.title("Sample size n = $(length(Y))")
end
show_fit_Normal (generic function with 1 method)
show_sample = false
macro simonce(seed, n, dist_true) quote begin
srand($(esc(seed)))
dist_true = $(esc(dist_true))
n = $(esc(n))
Y = rand(dist_true, n)
println("dist_true = $dist_true")
println("n = $n")
show_sample && println("Y = $Y")
println()
iterations = 500
burnin = 100
thin = 1
chains = 4
model, data, inits = sample2model_mixnormal(Y, chains=chains)
@time sim_mix = mcmc(model, data, inits, iterations, burnin=burnin, thin=thin, chains=chains, verbose=false)
showsummary(sim_mix, show_describe=false, show_gelman=false)
show_all_results(dist_true, Y, sim_mix, xmax=4.5,
dist_model=mixnormal, link_model=link_mixnormal, unlink_model=unlink_mixnormal)
iterations = 500
burnin = 100
thin = 1
chains = 4
model, data, inits = sample2model_normal1(Y, chains=chains)
@time sim_normal1 = mcmc(model, data, inits, iterations, burnin=burnin, thin=thin, chains=chains, verbose=false)
showsummary(sim_normal1, show_describe=false, show_gelman=false)
show_all_results(dist_true, Y, sim_normal1, xmax=4.5,
dist_model=normal1, link_model=link_normal1, unlink_model=unlink_normal1)
iterations = 500
burnin = 100
thin = 1
chains = 4
model, data, inits = sample2model_normal(Y, chains=chains)
@time sim_normal = mcmc(model, data, inits, iterations, burnin=burnin, thin=thin, chains=chains, verbose=false)
showsummary(sim_normal, show_describe=false, show_gelman=false)
show_all_results(dist_true, Y, sim_normal, xmax=4.5,
dist_model=normal, link_model=link_normal, unlink_model=unlink_normal)
end end end; # macro
@simonce(4649, 2^5, Normal())
dist_true = Distributions.Normal{Float64}(μ=0.0, σ=1.0) n = 32 15.065452 seconds (2.95 M allocations: 163.227 MiB, 0.55% gc time) === Estimates by mixnormal (n = 32) === param_Bayes = [0.486959, 0.233942, 0.261794] param_MLE = [0.936807, -1.19038, 0.501329] --- Information Criterions * AIC = 98.63118881738633 = 92.63118881738633 + 6.0 * GenLoss = 95.00645794903944 * WAIC = 95.23777068273637 = 93.032919310703 + 2.204851372033376 * LOOCV = 95.24412271029585 --- * BIC = 103.02839652578551 = 92.63118881738633 + 10.39720770839918 * FreeEnergy = 95.53777055940968 * WBIC = 96.1060899100275 ============================================================================== 6.376224 seconds (15.31 k allocations: 1.008 MiB) === Estimates by normal1 (n = 32) === param_Bayes = [0.378015] param_MLE = [0.394422] --- Information Criterions * AIC = 95.13734050276199 = 93.13734050276199 + 2.0 * GenLoss = 95.26561685218313 * WAIC = 95.17694627294915 = 93.09311908752528 + 2.0838271854238655 * LOOCV = 95.17759864932783 --- * BIC = 96.60307640556171 = 93.13734050276199 + 3.4657359027997265 * FreeEnergy = 95.04674485334121 * WBIC = 95.46863326332175 ============================================================================== 0.429267 seconds (22.90 k allocations: 1.495 MiB) === Estimates by normal (n = 32) === param_Bayes = [0.387085, 1.09131] param_MLE = [0.156444, 1.16935] --- Information Criterions * AIC = 99.25302370284928 = 95.25302370284928 + 4.0 * GenLoss = 95.41341947518924 * WAIC = 97.28749496835465 = 93.36300964008126 + 3.924485328273391 * LOOCV = 97.34306189082106 --- * BIC = 102.18449550844873 = 95.25302370284928 + 6.931471805599453 * FreeEnergy = 97.79522662090972 * WBIC = 99.0407046290298 ==============================================================================
PyObject <matplotlib.text.Text object at 0x000000005B700DA0>
汎化損失だけは mixnormal モデルが最も小さいが, 他は normal1 モデルが最も小さくなっている. 汎化損失は真の分布をカンニングしないと計算できないので, サンプルだけから計算できる情報量規準でモデル選択すると normal1 モデルが選択されることになる. mixnormal, normal1, normal モデルはどれも真の分布である標準正規分布を含むが, その中で最も単純なモデルが normal1 なので, もしもnormal1が選択されたのであれば最も望ましい結果になったと言える.
汎化損失の比較:
WAICの比較:
LOOCVの比較:
FreeEnergyの比較:
WBICの比較:
しかし, 以下の例を見ると, 常に normal1 が選択されるわけではないし, 情報量規準によって選択されるモデルが変化することがあることもわかる. このようなシミュレーションを大量に行って, どれだけの確率で何が選択されるかを確認する必要がありそうだ. サンプルサイズの違いの影響も見る必要があるだろう.
@simonce(12345, 2^5, Normal())
dist_true = Distributions.Normal{Float64}(μ=0.0, σ=1.0) n = 32 4.026143 seconds (14.24 k allocations: 1.119 MiB) === Estimates by mixnormal (n = 32) === param_Bayes = [0.49302, 0.0985841, 0.0411164] param_MLE = [0.892057, -0.552589, 0.204852] --- Information Criterions * AIC = 97.67302284048532 = 91.67302284048532 + 6.0 * GenLoss = 91.44613990292284 * WAIC = 94.02773660791163 = 91.901516462495 + 2.12622014541664 * LOOCV = 94.02576431316388 --- * BIC = 102.0702305488845 = 91.67302284048532 + 10.39720770839918 * FreeEnergy = 93.85677931131505 * WBIC = 94.25387919758585 ============================================================================== 0.202093 seconds (9.21 k allocations: 663.580 KiB) === Estimates by normal1 (n = 32) === param_Bayes = [0.119817] param_MLE = [0.123087] --- Information Criterions * AIC = 93.71347690163627 = 91.71347690163627 + 2.0 * GenLoss = 91.27194629270797 * WAIC = 93.71977666899444 = 91.70285577140132 + 2.016920897593122 * LOOCV = 93.72522172318688 --- * BIC = 95.17921280443599 = 91.71347690163627 + 3.4657359027997265 * FreeEnergy = 94.40939895824953 * WBIC = 95.15435527922597 ============================================================================== 0.456002 seconds (11.71 k allocations: 871.243 KiB) === Estimates by normal (n = 32) === param_Bayes = [0.121982, 1.06473] param_MLE = [0.0504263, 1.05172] --- Information Criterions * AIC = 95.93714128495486 = 91.93714128495486 + 4.0 * GenLoss = 91.63591250022913 * WAIC = 95.31922516920145 = 92.18702073102362 + 3.132204438177829 * LOOCV = 95.33831326867444 --- * BIC = 98.86861309055432 = 91.93714128495486 + 6.931471805599453 * FreeEnergy = 95.2924737239145 * WBIC = 96.04013823122672 ==============================================================================
PyObject <matplotlib.text.Text object at 0x000000005E48DB00>
@simonce(2017, 2^5, Normal())
dist_true = Distributions.Normal{Float64}(μ=0.0, σ=1.0) n = 32 3.288325 seconds (12.21 k allocations: 981.135 KiB) === Estimates by mixnormal (n = 32) === param_Bayes = [0.502104, 0.0809364, 0.0606336] param_MLE = [0.358857, 0.0800281, 0.0800236] --- Information Criterions * AIC = 94.17263427340721 = 88.17263427340721 + 6.0 * GenLoss = 91.24235945134691 * WAIC = 90.46749077316983 = 88.663595913717 + 1.8038948594528188 * LOOCV = 90.46980728728674 --- * BIC = 98.56984198180639 = 88.17263427340721 + 10.39720770839918 * FreeEnergy = 90.92634980215496 * WBIC = 91.50556783702432 ============================================================================== 0.156125 seconds (8.22 k allocations: 607.611 KiB) === Estimates by normal1 (n = 32) === param_Bayes = [0.0774779] param_MLE = [0.0800426] --- Information Criterions * AIC = 90.17263426713568 = 88.17263426713568 + 2.0 * GenLoss = 91.01270897388196 * WAIC = 90.0791221054582 = 88.26489108047711 + 1.8142310249810836 * LOOCV = 90.07967043721617 --- * BIC = 91.6383701699354 = 88.17263426713568 + 3.4657359027997265 * FreeEnergy = 90.2207133060381 * WBIC = 90.64834385833284 ============================================================================== 0.288153 seconds (10.21 k allocations: 738.540 KiB) === Estimates by normal (n = 32) === param_Bayes = [0.0780814, 1.01011] param_MLE = [0.00125602, 1.00126] --- Information Criterions * AIC = 92.37749924942932 = 88.37749924942932 + 4.0 * GenLoss = 91.09899559264029 * WAIC = 92.03599017369528 = 88.54809434373537 + 3.487895829959906 * LOOCV = 92.08031828775027 --- * BIC = 95.30897105502878 = 88.37749924942932 + 6.931471805599453 * FreeEnergy = 93.164766666965 * WBIC = 94.64303093819963 ==============================================================================
PyObject <matplotlib.text.Text object at 0x000000005C64B9B0>
# サンプルサイズを512にしてみる。
# FreeEnergyof 函数で使用している E2nLn 函数にバグがあったので修正した。
# 以前のバージョンでは n=2^9 で Domain Error が出ていた。
# C:/JuliaPkg/v0.6/QuadGK/src/QuadGK.jl の第86行を見よ。
# 以下は修正がうまく行っているかどうかのテスト。
@simonce(4649, 2^9, Normal())
dist_true = Distributions.Normal{Float64}(μ=0.0, σ=1.0) n = 512 83.905611 seconds (13.85 k allocations: 1.057 MiB) === Estimates by mixnormal (n = 512) === param_Bayes = [0.504444, -0.0770103, -0.0820722] param_MLE = [0.0358453, 0.119316, -1.48473] --- Information Criterions * AIC = 1488.8000964483801 = 1482.8000964483801 + 6.0 * GenLoss = 1455.8151387230396 * WAIC = 1488.2792651021853 = 1484.1529909891415 + 4.126274113043877 * LOOCV = 1488.2990615737433 --- * BIC = 1501.5150703234988 = 1482.8000964483801 + 18.714973875118524 * FreeEnergy = 1488.2200912360272 * WBIC = 1490.1367694232738 ============================================================================== 0.176935 seconds (7.04 k allocations: 605.049 KiB) === Estimates by normal1 (n = 512) === param_Bayes = [0.0614389] param_MLE = [0.0618234] --- Information Criterions * AIC = 1488.0979965449499 = 1486.0979965449499 + 2.0 * GenLoss = 1454.922867779272 * WAIC = 1488.2394570473878 = 1486.0323169765365 + 2.207140070851128 * LOOCV = 1488.2392399410628 --- * BIC = 1492.3363211699893 = 1486.0979965449499 + 6.238324625039508 * FreeEnergy = 1487.9213511156827 * WBIC = 1488.606057692652 ============================================================================== 0.380425 seconds (9.72 k allocations: 758.321 KiB) === Estimates by normal (n = 512) === param_Bayes = [0.0620504, 1.03635] param_MLE = [0.0411424, 1.042] --- Information Criterions * AIC = 1489.3713601332472 = 1485.3713601332472 + 4.0 * GenLoss = 1456.1971874635794 * WAIC = 1488.9875924881787 = 1485.068147403419 + 3.919445084759657 * LOOCV = 1488.9906525403153 --- * BIC = 1497.8480093833261 = 1485.3713601332472 + 12.476649250079015 * FreeEnergy = 1489.7152109234255 * WBIC = 1492.5401277876256 ==============================================================================
PyObject <matplotlib.text.Text object at 0x000000005B596978>
function InformationCriterions(dist_true, Y, sim; dist_model=mixnormal)
n = length(Y)
lpdf(w, y) = logpdf(dist_model(w), y)
loglikchainof = makefunc_loglikchainof(lpdf, sort(keys(sim)), Y)
loglik, chain = loglikchainof(sim)
WAIC, T_WAIC, V_WAIC = WAICof(loglik)
LOOCV = LOOCVof(loglik)
pred_Bayes = makefunc_pdfpred(lpdf, chain)
GL = 2*n*GeneralizationLossof(x->pdf(dist_true,x), pred_Bayes)[1]
WBIC = WBICof(loglik)
FreeEnergy = FreeEnergyof(loglik)[1]
return chain, WAIC, T_WAIC, V_WAIC, LOOCV, GL, WBIC, FreeEnergy
end
InformationCriterions (generic function with 1 method)
計算結果はファイルに保存される.
以下の場所からデータファイルをダウンロードしてこのノートブックと同じ場所に置いておけば, この節のセルを実行する必要はない.
dataname = "DataStandardNormalSample8Case4649"
seed = 4649
Nsims = 1000
dist_true = Normal()
n = 2^3
##########
Shannon = 2*n*entropy(dist_true)
iterations_mixnormal = 500
burnin_mixnormal = 100
thin_mixnormal = 1
chains_mixnormal = 4
iterations_normal1 = 500
burnin_normal1 = 100
thin_normal1 = 1
chains_normal1 = 4
iterations_normal = 500
burnin_normal = 100
thin_normal = 1
chains_normal = 4
N = Nsims
Sample = Array{Array{Float64}}(N)
T_true = Array{Float64}(N)
Chain_mixnormal = Array{Array{Float64,2}}(N)
WAIC_mixnormal = Array{Float64}(N)
T_mixnormal = Array{Float64}(N)
V_mixnormal = Array{Float64}(N)
LOOCV_mixnormal = Array{Float64}(N)
GL_mixnormal = Array{Float64}(N)
WBIC_mixnormal = Array{Float64}(N)
FreeEnergy_mixnormal = Array{Float64}(N)
Chain_normal1 = Array{Array{Float64,2}}(N)
WAIC_normal1 = Array{Float64}(N)
T_normal1 = Array{Float64}(N)
V_normal1 = Array{Float64}(N)
LOOCV_normal1 = Array{Float64}(N)
GL_normal1 = Array{Float64}(N)
WBIC_normal1 = Array{Float64}(N)
FreeEnergy_normal1 = Array{Float64}(N)
Chain_normal = Array{Array{Float64,2}}(N)
WAIC_normal = Array{Float64}(N)
T_normal = Array{Float64}(N)
V_normal = Array{Float64}(N)
LOOCV_normal = Array{Float64}(N)
GL_normal = Array{Float64}(N)
WBIC_normal = Array{Float64}(N)
FreeEnergy_normal = Array{Float64}(N)
@time for i in 1:N
Y = rand(dist_true, n)
Sample[i] = Y
T_true[i] = -2*@sum(logpdf(dist_true, Y[k]), k, 1:n)
# mixnormal
#
model_mixnormal, data_mixnormal, inits_mixnormal = sample2model_mixnormal(Y, chains=chains_mixnormal)
sim_mixnormal = mcmc(model_mixnormal, data_mixnormal, inits_mixnormal,
iterations_mixnormal, burnin=burnin_mixnormal, thin=thin_mixnormal, chains=chains_mixnormal, verbose=false)
Chain_mixnormal[i], WAIC_mixnormal[i], T_mixnormal[i], V_mixnormal[i],
LOOCV_mixnormal[i], GL_mixnormal[i], WBIC_mixnormal[i], FreeEnergy_mixnormal[i] =
InformationCriterions(dist_true, Y, sim_mixnormal, dist_model=mixnormal)
# normal1
#
model_normal1, data_normal1, inits_normal1 = sample2model_normal1(Y, chains=chains_normal1)
sim_normal1 = mcmc(model_normal1, data_normal1, inits_normal1,
iterations_normal1, burnin=burnin_normal1, thin=thin_normal1, chains=chains_normal1, verbose=false)
Chain_normal1[i], WAIC_normal1[i], T_normal1[i], V_normal1[i],
LOOCV_normal1[i], GL_normal1[i], WBIC_normal1[i], FreeEnergy_normal1[i] =
InformationCriterions(dist_true, Y, sim_normal1, dist_model=normal1)
# normal
#
model_normal, data_normal, inits_normal = sample2model_normal(Y, chains=chains_normal)
sim_normal = mcmc(model_normal, data_normal, inits_normal,
iterations_normal, burnin=burnin_normal, thin=thin_normal, chains=chains_normal, verbose=false)
Chain_normal[i], WAIC_normal[i], T_normal[i], V_normal[i],
LOOCV_normal[i], GL_normal[i], WBIC_normal[i], FreeEnergy_normal[i] =
InformationCriterions(dist_true, Y, sim_normal, dist_model=normal)
print("$i ")
i == N && println()
end
#########
varnames = [
"seed", "dist_true", "n", "Shannon",
"iterations_mixnormal", "burnin_mixnormal", "thin_mixnormal", "chains_mixnormal",
"iterations_normal1", "burnin_normal1", "thin_normal1", "chains_normal1",
"iterations_normal", "burnin_normal", "thin_normal", "chains_normal",
"Nsims", "Sample", "T_true",
"Chain_mixnormal", "WAIC_mixnormal", "T_mixnormal", "V_mixnormal",
"LOOCV_mixnormal", "GL_mixnormal", "WBIC_mixnormal", "FreeEnergy_mixnormal",
"Chain_normal1", "WAIC_normal1", "T_normal1", "V_normal1",
"LOOCV_normal1", "GL_normal1", "WBIC_normal1", "FreeEnergy_normal1",
"Chain_normal", "WAIC_normal", "T_normal", "V_normal",
"LOOCV_normal", "GL_normal", "WBIC_normal", "FreeEnergy_normal",
]
eval(parse("$dataname = Dict()"))
for v in sort(varnames)
eval(parse("$dataname[\"$v\"] = $v"))
end
@show jld2file = "$dataname.jld2"
eval(parse("save(jld2file, $dataname)"))
data = load("$dataname.jld2")
@show keys(data);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 2100.283449 seconds (14.62 G allocations: 327.065 GiB, 6.02% gc time) jld2file = "$(dataname).jld2" = "DataStandardNormalSample8Case4649.jld2" keys(data) = String["Sample", "T_mixnormal", "GL_normal1", "WBIC_normal1", "thin_mixnormal", "burnin_normal", "WAIC_normal1", "FreeEnergy_normal1", "LOOCV_normal1", "T_true", "T_normal", "T_normal1", "Nsims", "thin_normal", "Chain_normal1", "V_normal1", "V_normal", "WBIC_mixnormal", "LOOCV_normal", "GL_normal", "thin_normal1", "chains_normal", "n", "Chain_mixnormal", "chains_mixnormal", "Shannon", "burnin_mixnormal", "iterations_normal", "chains_normal1", "GL_mixnormal", "FreeEnergy_mixnormal", "WBIC_normal", "burnin_normal1", "WAIC_mixnormal", "iterations_normal1", "dist_true", "Chain_normal", "iterations_mixnormal", "FreeEnergy_normal", "V_mixnormal", "WAIC_normal", "LOOCV_mixnormal", "seed"]
dataname = "DataStandardNormalSample32Case4649"
seed = 4649
Nsims = 1000
dist_true = Normal()
n = 2^5
##########
Shannon = 2*n*entropy(dist_true)
iterations_mixnormal = 500
burnin_mixnormal = 100
thin_mixnormal = 1
chains_mixnormal = 4
iterations_normal1 = 500
burnin_normal1 = 100
thin_normal1 = 1
chains_normal1 = 4
iterations_normal = 500
burnin_normal = 100
thin_normal = 1
chains_normal = 4
N = Nsims
Sample = Array{Array{Float64}}(N)
T_true = Array{Float64}(N)
Chain_mixnormal = Array{Array{Float64,2}}(N)
WAIC_mixnormal = Array{Float64}(N)
T_mixnormal = Array{Float64}(N)
V_mixnormal = Array{Float64}(N)
LOOCV_mixnormal = Array{Float64}(N)
GL_mixnormal = Array{Float64}(N)
WBIC_mixnormal = Array{Float64}(N)
FreeEnergy_mixnormal = Array{Float64}(N)
Chain_normal1 = Array{Array{Float64,2}}(N)
WAIC_normal1 = Array{Float64}(N)
T_normal1 = Array{Float64}(N)
V_normal1 = Array{Float64}(N)
LOOCV_normal1 = Array{Float64}(N)
GL_normal1 = Array{Float64}(N)
WBIC_normal1 = Array{Float64}(N)
FreeEnergy_normal1 = Array{Float64}(N)
Chain_normal = Array{Array{Float64,2}}(N)
WAIC_normal = Array{Float64}(N)
T_normal = Array{Float64}(N)
V_normal = Array{Float64}(N)
LOOCV_normal = Array{Float64}(N)
GL_normal = Array{Float64}(N)
WBIC_normal = Array{Float64}(N)
FreeEnergy_normal = Array{Float64}(N)
@time for i in 1:N
Y = rand(dist_true, n)
Sample[i] = Y
T_true[i] = -2*@sum(logpdf(dist_true, Y[k]), k, 1:n)
# mixnormal
#
model_mixnormal, data_mixnormal, inits_mixnormal = sample2model_mixnormal(Y, chains=chains_mixnormal)
sim_mixnormal = mcmc(model_mixnormal, data_mixnormal, inits_mixnormal,
iterations_mixnormal, burnin=burnin_mixnormal, thin=thin_mixnormal, chains=chains_mixnormal, verbose=false)
Chain_mixnormal[i], WAIC_mixnormal[i], T_mixnormal[i], V_mixnormal[i],
LOOCV_mixnormal[i], GL_mixnormal[i], WBIC_mixnormal[i], FreeEnergy_mixnormal[i] =
InformationCriterions(dist_true, Y, sim_mixnormal, dist_model=mixnormal)
# normal1
#
model_normal1, data_normal1, inits_normal1 = sample2model_normal1(Y, chains=chains_normal1)
sim_normal1 = mcmc(model_normal1, data_normal1, inits_normal1,
iterations_normal1, burnin=burnin_normal1, thin=thin_normal1, chains=chains_normal1, verbose=false)
Chain_normal1[i], WAIC_normal1[i], T_normal1[i], V_normal1[i],
LOOCV_normal1[i], GL_normal1[i], WBIC_normal1[i], FreeEnergy_normal1[i] =
InformationCriterions(dist_true, Y, sim_normal1, dist_model=normal1)
# normal
#
model_normal, data_normal, inits_normal = sample2model_normal(Y, chains=chains_normal)
sim_normal = mcmc(model_normal, data_normal, inits_normal,
iterations_normal, burnin=burnin_normal, thin=thin_normal, chains=chains_normal, verbose=false)
Chain_normal[i], WAIC_normal[i], T_normal[i], V_normal[i],
LOOCV_normal[i], GL_normal[i], WBIC_normal[i], FreeEnergy_normal[i] =
InformationCriterions(dist_true, Y, sim_normal, dist_model=normal)
print("$i ")
i == N && println()
end
#########
varnames = [
"seed", "dist_true", "n", "Shannon",
"iterations_mixnormal", "burnin_mixnormal", "thin_mixnormal", "chains_mixnormal",
"iterations_normal1", "burnin_normal1", "thin_normal1", "chains_normal1",
"iterations_normal", "burnin_normal", "thin_normal", "chains_normal",
"Nsims", "Sample", "T_true",
"Chain_mixnormal", "WAIC_mixnormal", "T_mixnormal", "V_mixnormal",
"LOOCV_mixnormal", "GL_mixnormal", "WBIC_mixnormal", "FreeEnergy_mixnormal",
"Chain_normal1", "WAIC_normal1", "T_normal1", "V_normal1",
"LOOCV_normal1", "GL_normal1", "WBIC_normal1", "FreeEnergy_normal1",
"Chain_normal", "WAIC_normal", "T_normal", "V_normal",
"LOOCV_normal", "GL_normal", "WBIC_normal", "FreeEnergy_normal",
]
eval(parse("$dataname = Dict()"))
for v in sort(varnames)
eval(parse("$dataname[\"$v\"] = $v"))
end
@show jld2file = "$dataname.jld2"
eval(parse("save(jld2file, $dataname)"))
data = load("$dataname.jld2")
@show keys(data);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 4575.987428 seconds (17.00 G allocations: 383.087 GiB, 5.16% gc time) jld2file = "$(dataname).jld2" = "DataStandardNormalSample32Case4649.jld2" keys(data) = String["Sample", "T_mixnormal", "GL_normal1", "WBIC_normal1", "thin_mixnormal", "burnin_normal", "WAIC_normal1", "FreeEnergy_normal1", "LOOCV_normal1", "T_true", "T_normal", "T_normal1", "Nsims", "thin_normal", "Chain_normal1", "V_normal1", "V_normal", "WBIC_mixnormal", "LOOCV_normal", "GL_normal", "thin_normal1", "chains_normal", "n", "Chain_mixnormal", "chains_mixnormal", "Shannon", "burnin_mixnormal", "iterations_normal", "chains_normal1", "GL_mixnormal", "FreeEnergy_mixnormal", "WBIC_normal", "burnin_normal1", "WAIC_mixnormal", "iterations_normal1", "dist_true", "Chain_normal", "iterations_mixnormal", "FreeEnergy_normal", "V_mixnormal", "WAIC_normal", "LOOCV_mixnormal", "seed"]
dataname = "DataStandardNormalSample128Case4649"
seed = 4649
Nsims = 1000
dist_true = Normal()
n = 2^7
##########
Shannon = 2*n*entropy(dist_true)
iterations_mixnormal = 500
burnin_mixnormal = 100
thin_mixnormal = 1
chains_mixnormal = 4
iterations_normal1 = 500
burnin_normal1 = 100
thin_normal1 = 1
chains_normal1 = 4
iterations_normal = 500
burnin_normal = 100
thin_normal = 1
chains_normal = 4
N = Nsims
Sample = Array{Array{Float64}}(N)
T_true = Array{Float64}(N)
Chain_mixnormal = Array{Array{Float64,2}}(N)
WAIC_mixnormal = Array{Float64}(N)
T_mixnormal = Array{Float64}(N)
V_mixnormal = Array{Float64}(N)
LOOCV_mixnormal = Array{Float64}(N)
GL_mixnormal = Array{Float64}(N)
WBIC_mixnormal = Array{Float64}(N)
FreeEnergy_mixnormal = Array{Float64}(N)
Chain_normal1 = Array{Array{Float64,2}}(N)
WAIC_normal1 = Array{Float64}(N)
T_normal1 = Array{Float64}(N)
V_normal1 = Array{Float64}(N)
LOOCV_normal1 = Array{Float64}(N)
GL_normal1 = Array{Float64}(N)
WBIC_normal1 = Array{Float64}(N)
FreeEnergy_normal1 = Array{Float64}(N)
Chain_normal = Array{Array{Float64,2}}(N)
WAIC_normal = Array{Float64}(N)
T_normal = Array{Float64}(N)
V_normal = Array{Float64}(N)
LOOCV_normal = Array{Float64}(N)
GL_normal = Array{Float64}(N)
WBIC_normal = Array{Float64}(N)
FreeEnergy_normal = Array{Float64}(N)
@time for i in 1:N
Y = rand(dist_true, n)
Sample[i] = Y
T_true[i] = -2*@sum(logpdf(dist_true, Y[k]), k, 1:n)
# mixnormal
#
model_mixnormal, data_mixnormal, inits_mixnormal = sample2model_mixnormal(Y, chains=chains_mixnormal)
sim_mixnormal = mcmc(model_mixnormal, data_mixnormal, inits_mixnormal,
iterations_mixnormal, burnin=burnin_mixnormal, thin=thin_mixnormal, chains=chains_mixnormal, verbose=false)
Chain_mixnormal[i], WAIC_mixnormal[i], T_mixnormal[i], V_mixnormal[i],
LOOCV_mixnormal[i], GL_mixnormal[i], WBIC_mixnormal[i], FreeEnergy_mixnormal[i] =
InformationCriterions(dist_true, Y, sim_mixnormal, dist_model=mixnormal)
# normal1
#
model_normal1, data_normal1, inits_normal1 = sample2model_normal1(Y, chains=chains_normal1)
sim_normal1 = mcmc(model_normal1, data_normal1, inits_normal1,
iterations_normal1, burnin=burnin_normal1, thin=thin_normal1, chains=chains_normal1, verbose=false)
Chain_normal1[i], WAIC_normal1[i], T_normal1[i], V_normal1[i],
LOOCV_normal1[i], GL_normal1[i], WBIC_normal1[i], FreeEnergy_normal1[i] =
InformationCriterions(dist_true, Y, sim_normal1, dist_model=normal1)
# normal
#
model_normal, data_normal, inits_normal = sample2model_normal(Y, chains=chains_normal)
sim_normal = mcmc(model_normal, data_normal, inits_normal,
iterations_normal, burnin=burnin_normal, thin=thin_normal, chains=chains_normal, verbose=false)
Chain_normal[i], WAIC_normal[i], T_normal[i], V_normal[i],
LOOCV_normal[i], GL_normal[i], WBIC_normal[i], FreeEnergy_normal[i] =
InformationCriterions(dist_true, Y, sim_normal, dist_model=normal)
print("$i ")
i == N && println()
end
#########
varnames = [
"seed", "dist_true", "n", "Shannon",
"iterations_mixnormal", "burnin_mixnormal", "thin_mixnormal", "chains_mixnormal",
"iterations_normal1", "burnin_normal1", "thin_normal1", "chains_normal1",
"iterations_normal", "burnin_normal", "thin_normal", "chains_normal",
"Nsims", "Sample", "T_true",
"Chain_mixnormal", "WAIC_mixnormal", "T_mixnormal", "V_mixnormal",
"LOOCV_mixnormal", "GL_mixnormal", "WBIC_mixnormal", "FreeEnergy_mixnormal",
"Chain_normal1", "WAIC_normal1", "T_normal1", "V_normal1",
"LOOCV_normal1", "GL_normal1", "WBIC_normal1", "FreeEnergy_normal1",
"Chain_normal", "WAIC_normal", "T_normal", "V_normal",
"LOOCV_normal", "GL_normal", "WBIC_normal", "FreeEnergy_normal",
]
eval(parse("$dataname = Dict()"))
for v in sort(varnames)
eval(parse("$dataname[\"$v\"] = $v"))
end
@show jld2file = "$dataname.jld2"
eval(parse("save(jld2file, $dataname)"))
data = load("$dataname.jld2")
@show keys(data);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 17368.393732 seconds (26.69 G allocations: 614.388 GiB, 2.75% gc time) jld2file = "$(dataname).jld2" = "DataStandardNormalSample128Case4649.jld2" keys(data) = String["Sample", "T_mixnormal", "GL_normal1", "WBIC_normal1", "thin_mixnormal", "burnin_normal", "WAIC_normal1", "FreeEnergy_normal1", "LOOCV_normal1", "T_true", "T_normal", "T_normal1", "Nsims", "thin_normal", "Chain_normal1", "V_normal1", "V_normal", "WBIC_mixnormal", "LOOCV_normal", "GL_normal", "thin_normal1", "chains_normal", "n", "Chain_mixnormal", "chains_mixnormal", "Shannon", "burnin_mixnormal", "iterations_normal", "chains_normal1", "GL_mixnormal", "FreeEnergy_mixnormal", "WBIC_normal", "burnin_normal1", "WAIC_mixnormal", "iterations_normal1", "dist_true", "Chain_normal", "iterations_mixnormal", "FreeEnergy_normal", "V_mixnormal", "WAIC_normal", "LOOCV_mixnormal", "seed"]
#data = load("DataStandardNormalSample8Case4649.jld2")
#data = load("DataStandardNormalSample32Case4649.jld2")
data = load("DataStandardNormalSample128Case4649.jld2")
for v in sort(collect(keys(data)))
ex = parse("$v = data[\"$v\"]")
println(ex)
eval(ex)
end
Chain_mixnormal = data["Chain_mixnormal"] Chain_normal = data["Chain_normal"] Chain_normal1 = data["Chain_normal1"] FreeEnergy_mixnormal = data["FreeEnergy_mixnormal"] FreeEnergy_normal = data["FreeEnergy_normal"] FreeEnergy_normal1 = data["FreeEnergy_normal1"] GL_mixnormal = data["GL_mixnormal"] GL_normal = data["GL_normal"] GL_normal1 = data["GL_normal1"] LOOCV_mixnormal = data["LOOCV_mixnormal"] LOOCV_normal = data["LOOCV_normal"] LOOCV_normal1 = data["LOOCV_normal1"] Nsims = data["Nsims"] Sample = data["Sample"] Shannon = data["Shannon"] T_mixnormal = data["T_mixnormal"] T_normal = data["T_normal"] T_normal1 = data["T_normal1"] T_true = data["T_true"] V_mixnormal = data["V_mixnormal"] V_normal = data["V_normal"] V_normal1 = data["V_normal1"] WAIC_mixnormal = data["WAIC_mixnormal"] WAIC_normal = data["WAIC_normal"] WAIC_normal1 = data["WAIC_normal1"] WBIC_mixnormal = data["WBIC_mixnormal"] WBIC_normal = data["WBIC_normal"] WBIC_normal1 = data["WBIC_normal1"] burnin_mixnormal = data["burnin_mixnormal"] burnin_normal = data["burnin_normal"] burnin_normal1 = data["burnin_normal1"] chains_mixnormal = data["chains_mixnormal"] chains_normal = data["chains_normal"] chains_normal1 = data["chains_normal1"] dist_true = data["dist_true"] iterations_mixnormal = data["iterations_mixnormal"] iterations_normal = data["iterations_normal"] iterations_normal1 = data["iterations_normal1"] n = data["n"] seed = data["seed"] thin_mixnormal = data["thin_mixnormal"] thin_normal = data["thin_normal"] thin_normal1 = data["thin_normal1"]
@show Shannon
@show n*log(2pi) + n
Shannon = 363.2482645003962 n * log(2pi) + n = 363.2482645003962
363.2482645003962
確率分布 $q(x)$ で独立生成したサンプルを $X_1,\ldots,X_n$ と書き, 推定用の確率モデルを $p(x|w)$ と書き, 予測分布を $p^*(x)$ と書く. 以下で採用したスケールはすべて伝統的なAICのスケール(対数尤度比のカイ二乗検定のスケール)に合わせてある.
$$ \begin{aligned} & \mathrm{S} = \mathrm{Shannon} = -2n\int q(x)\log q(x)\,dx, \\ & \mathrm{GL} = \text{Generalization Loss} = -2n\int q(x)\log p^*(x)\,dx, \\ & \mathrm{KL} = \text{Kullback-Leibler} = \mathrm{GL} - \mathrm{S} = 2n\int q(x)\log\frac{q(x)}{p^*(x)}\,dx, \\ & \mathrm{T}_\mathrm{true} = -2\sum_{k=1}^n \log q(X_k), \\ & \mathrm{T} = -2\sum_{k=1}^n \log p^*(X_k), \\ & \mathrm{TT} = \mathrm{T} - \mathrm{T}_\mathrm{true} = 2\sum_{k=1}^n\log\frac{q(X_k)}{p^*(X_k)}, \\ & \mathrm{V} = 2\sum_{k=1}^n (\text{posterior variance of}\ \log p(X_k|w)) \\ & \mathrm{WAIC} = T + V, \\ & \mathrm{WT} = \mathrm{WAIC} - \mathrm{T}_\mathrm{true} = \mathrm{TT} + \mathrm{V}, \\ & \mathrm{KW} = \mathrm{KL} + \mathrm{WT} = (\mathrm{GL} - \mathrm{S}) + (\mathrm{WAIC} - \mathrm{T}). \end{aligned} $$最後の $\mathrm{KW}$ の定義が差ではなく和になっていることに注意せよ.
渡辺澄夫著『ベイズ統計の理論と方法』の第4章の定理12(p.114)の $\beta=1$ の場合は, 以上で採用した記号とスケールのもとで以下のように書き直される:
$$ \begin{aligned} & \mathrm{GL} = \mathrm{S} + 2\lambda + \langle\sqrt{t}\;\xi_n(u)\rangle - V(\xi_n) + o_p(1), \\ & \mathrm{T} = \mathrm{S} + 2\lambda - \langle\sqrt{t}\;\xi_n(u)\rangle - V(\xi_n) + o_p(1). \end{aligned} $$ゆえに
$$ \mathrm{GL} = \mathrm{T} + 2\langle\sqrt{t}\;\xi_n(u)\rangle + o_p(1). $$同書同章補題23(p.115)より
$$ E_\xi\left[\langle\sqrt{t}\;\xi(u)\rangle\right] = E_\xi[V(\xi)] $$であり, 我々の $\mathrm{V}$ は $2V(\xi_n)$ と漸近等価である(同書同章の定義22(p.117)の $V_n$ の定義は我々の $\mathrm{V}/2$ に一致する). サンプルの取り方の変化に関する期待値を $E[\;\;]$ と書くと,
$$ E\left[\langle\sqrt{t}\;\xi_n(u)\rangle - V(\xi_n)\right] = o(1), \quad E\left[\langle\sqrt{t}\;\xi_n(u)\rangle + V(\xi_n)\right] = E[\mathrm{V}] + o(1), \quad E[\mathrm{GL}] = E[\mathrm{T} + \mathrm{V}] + o(1). $$さらに同書同章補題24(p.118)より
$$ E[\mathrm{V}] = 4\nu + o(1) $$であることに注意せよ. 同書同章定理15(p.119)の $\beta=1$ の場合は次のように書き直される:
$$ \mathrm{KW} = \mathrm{KL} + \mathrm{WT} = (\mathrm{GL} - \mathrm{S}) + (\mathrm{WAIC} - \mathrm{T})= 4\lambda + o_p(1). $$この公式は期待値を取る前に成立していることに注意せよ. この公式より $\mathrm{KL}+\mathrm{WT}$ のゆらぎが $n\to\infty$ で0に近付くことがわかる.
以上で登場した $\lambda$, $\nu$ はそれぞれ実対数閾値, 特異ゆらぎと呼ばれている. それぞれ同書の p.108, p.117 で定義されている.
このノートにおける $\mathrm{LOOCV}$, $\mathrm{WBIC}$, $\mathrm{FreeEnergy}$ の定義は以下の通り:
$$ \begin{aligned} \mathrm{LOOCV} &= -2\sum_{k=1}^n \log\left( \text{posterior mean of}\ p(X_k|w)\ \text{for the sample}\ X_1,\ldots,\widehat X_k,\ldots,X_n\right) \\ & = 2\sum_{k=1}^n \log\left( \text{posterior mean of}\ \frac{1}{p(X_k|w)} \right), \\ \mathrm{WBIC} &= 2\left(\text{posterior mean of}\ -\sum_{k=1}^n \log p(X_k|w) \ \text{at the inverse temperature}\ \beta=\frac{1}{\log n}\right), \\ \mathrm{FreeEnergy} &= -2\log Z_n = 2\int_0^1 \left( \text{posterior mean of}\ -\sum_{k=1}^n \log p(X_k|w) \ \text{at the inverse temperature}\ \beta \right)\,d\beta. \end{aligned} $$ここで $\widehat X_k$ は $X_k$ を取り除くことを意味する.
逆温度 $\beta$ での事後分布に関する平均は逆温度 $\beta=1$ における通常の事後分布に関する平均で計算可能であることに注意せよ.
$$ \begin{aligned} & \left( \text{posterior mean of}\ f(w) \ \text{at the inverse temperature}\ \beta \right) = \frac{N(\beta)}{D(\beta)}, \\& N(\beta) = \left(\text{posterior mean of}\ f(w)\left(\prod_{k=1}^n p(X_k|w)\right)^{\beta-1}\right), \\ & D(\beta) = \left(\text{posterior mean of}\ \left(\prod_{k=1}^n p(X_k|w)\right)^{\beta-1}\right). \end{aligned} $$警告・注意 (2018-01-13) このノートブックの始めの方に追加した警告・注意でも述べていることだが, 逆温度β=1での事後分布のサンプルを用いてWBICを計算すると誤差が大きくなるので止めた方がよい. よくよく考えると当たり前のことなのだが, 私は実際にやってみるまで気付かなかった.
s(symbol, suffix) = eval(Symbol(symbol, :_, suffix))
function plotTh12(modelname::String; bins=25)
GL = s(:GL, modelname)
T = s(:T, modelname)
V = s(:V, modelname)
WAIC = s(:WAIC, modelname)
LOOCV = s(:LOOCV, modelname)
WBIC = s(:WBIC, modelname)
FreeEnergy = s(:FreeEnergy, modelname)
WG = WAIC - GL
LG = LOOCV - GL
LW = LOOCV - WAIC
KL = GL - Shannon
TT = T - T_true
WT = WAIC - T_true
KW = KL + WT
println("\n========== $modelname model (n = $n)")
println("2λ (std) = $(mean(KW)/2) ($(std(KW)/2))")
println("2ν (std) = $(mean(V)/2) ($(std(V)/2))")
println("="^70)
println("Name | mean (std)")
println("-"^70)
println("T | $(mean(T)) ($(std(T)))")
println("V | $(mean(V)) ($(std(V)))")
println("WAIC = T + V | $(mean(WAIC)) ($(std(WAIC)))")
println("LOOCV | $(mean(LOOCV)) ($(std(LOOCV)))")
println("GL | $(mean(GL)) ($(std(GL)))")
println("-"^70)
println("WAIC - GL | $(mean(WG)) ($(std(WG)))")
println("LOOCV - GL | $(mean(LG)) ($(std(LG)))")
println("LOOCV - WAIC | $(mean(LW)) ($(std(LW)))")
println("-"^70)
println("KL = GL - S | $(mean(KL)) ($(std(KL)))")
println("TT = T - T_true | $(mean(TT)) ($(std(TT)))")
println("WT = WAIC - T_true | $(mean(WT)) ($(std(WT)))")
println("KW = KL + WT | $(mean(KW)) ($(std(KW)))")
println("-"^70)
println("WBIC | $(mean(WBIC)) ($(std(WBIC)))")
println("FreeEnergy | $(mean(FreeEnergy)) ($(std(FreeEnergy)))")
println("-"^70)
println("WBIC - FreeEnergy | $(mean(WBIC-FreeEnergy)) ($(std(WBIC-FreeEnergy)))")
println("WBIC - T_true | $(mean(WBIC-T_true)) ($(std(WBIC-T_true)))")
println("FreeEnergy - T_true | $(mean(FreeEnergy-T_true)) ($(std(FreeEnergy-T_true)))")
println("="^70)
println("correlation of TT and KL = $(cor(TT, KL))")
println("correlation of WT and WT = $(cor(WT, KL))")
println("="^70)
println()
sleep(0.1)
plt.figure(figsize=(8,8))
plt.subplot(321)
x = linspace(minimum(WAIC), maximum(WAIC), 200)
plt.plt[:hist](WAIC, normed=true, bins=bins, alpha=0.5, label="WAIC")
plt.plot(x, pdf.(Chisq(n), x-(Shannon-n+mean(KW))), label="χ²($n) + const")
plt.grid(ls=":")
plt.legend(fontsize=8)
plt.title("WAIC of $modelname, n=$n, Nsims=$Nsims", fontsize=10)
plt.subplot(322)
x = linspace(minimum(T_true), maximum(T_true), 200)
plt.plt[:hist](T_true, normed=true, bins=bins, alpha=0.5, label="T_true")
plt.plot(x, pdf.(Chisq(n), x - (Shannon-n)), label="χ²($n) + const")
plt.grid(ls=":")
plt.legend(fontsize=8)
plt.title("T_true of the true dist, n=$n, Nsims=$Nsims", fontsize=10)
plt.subplot(323)
plt.plt[:hist](KL, normed=true, bins=bins, alpha=0.5, label="KL = GL \$-\$ S")
plt.grid(ls=":")
plt.legend()
plt.title("KL = GL \$-\$ Shannon for $modelname, n=$n", fontsize=10)
plt.subplot(324)
plt.plt[:hist](TT, normed=true, bins=bins, alpha=0.5, label="TT = T \$-\$ T_true")
plt.grid(ls=":")
plt.legend()
plt.title("TT = T \$-\$ T_true for $modelname, n=$n", fontsize=10)
plt.subplot(325)
plt.plt[:hist](WT, normed=true, bins=bins, alpha=0.5, label="WT = WAIC \$-\$ T_true")
plt.grid(ls=":")
plt.legend()
plt.title("WT = WAIC \$-\$ T_true for $modelname, n=$n", fontsize=10)
plt.subplot(326)
plt.plt[:hist](KW, normed=true, bins=bins, alpha=0.5, label="KW = KL + WT")
plt.axvline(mean(KW), color="k", ls=":", label="mean")
plt.grid(ls=":")
plt.legend(fontsize=8)
plt.title("KW = KL + WT for $modelname, n=$n", fontsize=10)
plt.tight_layout()
end
function plotModelSelection(criterion::String, leftmodel::String, rightmodel::String;
title="Model Selection by $criterion (n=$n)",
xlabel="$leftmodel ←―――――――――――――――――→ $rightmodel")
IC_left = s(criterion, leftmodel)
IC_right = s(criterion, rightmodel)
Diff = IC_left - IC_right
Percent_left = 100*count(x -> x < 0.0, Diff)/length(Diff)
Percent_right = 100*count(x -> x ≥ 0.0, Diff)/length(Diff)
rmin = minDiff = minimum(Diff)
rmax = maxDiff = maximum(Diff)
(rmin > -0.3*rmax) && (rmin = -0.3*rmax)
(rmax < -0.3*rmin) && (rmax = -0.3*rmin)
h = plt.plt[:hist](Diff, normed=true, bins=50, alpha=0.5, range=(rmin,rmax), label="diff of $criterion")
plt.xlabel(xlabel)
plt.ylabel("probability density")
plt.axvline(0, color="red", ls="--")
xann1 = rmin
yann1 = 0.9*maximum(h[1])
plt.annotate("$Percent_left%",
xytext=(0.9*xann1, 0.6*yann1),
xy=(0.4*minDiff, 0.2*yann1),
arrowprops=Dict("arrowstyle" => "->", "color" => "red"),
color="red")
xann2 = 0.7*rmax
yann2 = 0.9*maximum(h[1])
plt.annotate("$Percent_right%",
xytext=(0.9*xann2, 0.6*yann2),
xy=(0.4*maxDiff, 0.2*yann2),
arrowprops=Dict("arrowstyle" => "->", "color" => "red"),
color="red")
plt.legend()
plt.grid(ls=":")
plt.title(title, fontsize=10)
end
function plotAllModelSelections(criterion::String)
plt.figure(figsize=(10,7))
plt.subplot(221); plotModelSelection(criterion, "normal1", "mixnormal")
plt.subplot(222); plotModelSelection(criterion, "normal", "mixnormal")
plt.subplot(223); plotModelSelection(criterion, "normal", "normal1")
plt.tight_layout()
end
plotAllModelSelections (generic function with 1 method)
data = load("DataStandardNormalSample8Case4649.jld2")
#data = load("DataStandardNormalSample32Case4649.jld2")
#data = load("DataStandardNormalSample128Case4649.jld2")
for v in sort(collect(keys(data)))
ex = parse("$v = data[\"$v\"]")
#println(ex)
eval(ex)
end
plotTh12("mixnormal")
plotTh12("normal1")
plotTh12("normal")
plotAllModelSelections("T")
plotAllModelSelections("GL")
plotAllModelSelections("WAIC")
plotAllModelSelections("LOOCV")
plotAllModelSelections("WBIC")
plotAllModelSelections("FreeEnergy");
========== mixnormal model (n = 8) 2λ (std) = 0.8151770751398939 (0.33712816732340767) 2ν (std) = 0.7750326586010107 (0.4383483117759712) ====================================================================== Name | mean (std) ---------------------------------------------------------------------- T | 22.02234590886735 (2.822472924267081) V | 1.5500653172020213 (0.8766966235519424) WAIC = T + V | 23.572411226069374 (3.668350621168819) LOOCV | 23.585688515316896 (3.6729718494789463) GL | 23.506632231233194 (0.7255248507398574) ---------------------------------------------------------------------- WAIC - GL | 0.0657789948361795 (3.640909010259932) LOOCV - GL | 0.07905628408369625 (3.6454281249067693) LOOCV - WAIC | 0.013277289247516703 (0.01885237468776022) ---------------------------------------------------------------------- KL = GL - S | 0.8036156999584336 (0.7255248507398576) TT = T - T_true | -0.7233268668806677 (1.6194691398871135) WT = WAIC - T_true | 0.8267384503213532 (1.369376555733368) KW = KL + WT | 1.6303541502797878 (0.6742563346468153) ---------------------------------------------------------------------- WBIC | 23.571068280797494 (3.1898190957066386) FreeEnergy | 23.78876430643267 (3.1953341871429077) ---------------------------------------------------------------------- WBIC - FreeEnergy | -0.21769602563517906 (0.2596683586612544) WBIC - T_true | 0.8253955050494742 (1.376066424246314) FreeEnergy - T_true | 1.0430915306846535 (1.4251497368413084) ====================================================================== correlation of TT and KL = -0.9089333129635853 correlation of WT and WT = -0.9798309449992468 ====================================================================== ========== normal1 model (n = 8) 2λ (std) = 0.7776267031306179 (0.3195502320619031) 2ν (std) = 0.8267803609497079 (0.42031062403709857) ====================================================================== Name | mean (std) ---------------------------------------------------------------------- T | 21.88137916433341 (3.3825987827659483) V | 1.6535607218994157 (0.8406212480741971) WAIC = T + V | 23.534939886232824 (4.217089768824615) LOOCV | 23.554680817718843 (4.2282176144979315) GL | 23.46900282705119 (0.99430894479763) ---------------------------------------------------------------------- WAIC - GL | 0.06593705918162938 (4.416306098938093) LOOCV - GL | 0.08567799066764087 (4.427204731759379) LOOCV - WAIC | 0.019740931486011422 (0.023857838836381543) ---------------------------------------------------------------------- KL = GL - S | 0.7659862957764332 (0.9943089447976299) TT = T - T_true | -0.864293611414613 (1.3988277063144336) WT = WAIC - T_true | 0.7892671104848024 (1.4983840655374459) KW = KL + WT | 1.5552534062612358 (0.6391004641238062) ---------------------------------------------------------------------- WBIC | 23.401618713846357 (3.7680507991604855) FreeEnergy | 23.666550841179728 (3.798490571952353) ---------------------------------------------------------------------- WBIC - FreeEnergy | -0.2649321273333608 (0.327178045892851) WBIC - T_true | 0.6559459380983431 (1.3640438454994364) FreeEnergy - T_true | 0.920878065431704 (1.4274684937467987) ====================================================================== correlation of TT and KL = -0.96304154909122 correlation of WT and WT = -0.9481971393426192 ====================================================================== ========== normal model (n = 8) 2λ (std) = 1.5657327671531884 (0.5581013851994706) 2ν (std) = 1.3115565532138558 (0.2693370584380582) ====================================================================== Name | mean (std) ---------------------------------------------------------------------- T | 21.540366618060787 (4.3735583379887695) V | 2.6231131064277116 (0.5386741168761164) WAIC = T + V | 24.163479724488496 (4.326841918649876) LOOCV | 24.429838178928534 (4.355862366327318) GL | 24.416675116840654 (1.7419998620411319) ---------------------------------------------------------------------- WAIC - GL | -0.253195392352159 (4.963997154879706) LOOCV - GL | 0.013163062087868386 (4.988154549127784) LOOCV - WAIC | 0.2663584544400274 (0.3564830594923183) ---------------------------------------------------------------------- KL = GL - S | 1.7136585855658975 (1.7419998620411317) TT = T - T_true | -1.2053061576872328 (2.18177726534805) WT = WAIC - T_true | 1.4178069487404794 (2.1587873808504288) KW = KL + WT | 3.131465534306377 (1.1162027703989412) ---------------------------------------------------------------------- WBIC | 24.458762335286124 (3.760764511088155) FreeEnergy | 24.699931383600443 (3.712036066121713) ---------------------------------------------------------------------- WBIC - FreeEnergy | -0.24116904831431873 (0.23736387096043504) WBIC - T_true | 1.7130895595381033 (1.9242072305636941) FreeEnergy - T_true | 1.9542586078524216 (1.942409961779507) ====================================================================== correlation of TT and KL = -0.8713370273786667 correlation of WT and WT = -0.8574436656523745 ======================================================================
#data = load("DataStandardNormalSample8Case4649.jld2")
data = load("DataStandardNormalSample32Case4649.jld2")
#data = load("DataStandardNormalSample128Case4649.jld2")
for v in sort(collect(keys(data)))
ex = parse("$v = data[\"$v\"]")
#println(ex)
eval(ex)
end
plotTh12("mixnormal")
plotTh12("normal1")
plotTh12("normal")
plotAllModelSelections("T")
plotAllModelSelections("GL")
plotAllModelSelections("WAIC")
plotAllModelSelections("LOOCV")
plotAllModelSelections("WBIC")
plotAllModelSelections("FreeEnergy");
========== mixnormal model (n = 32) 2λ (std) = 1.155175814734873 (0.1813903105053722) 2ν (std) = 1.0742079398050077 (0.4299022217524213) ====================================================================== Name | mean (std) ---------------------------------------------------------------------- T | 89.71453458532856 (6.604903567637937) V | 2.1484158796100155 (0.8598044435048426) WAIC = T + V | 91.86295046493856 (7.371807010659133) LOOCV | 91.86165485438019 (7.365566321980648) GL | 91.97317169532286 (1.2051202161380583) ---------------------------------------------------------------------- WAIC - GL | -0.11022123038427012 (7.245313316555237) LOOCV - GL | -0.11151684094266495 (7.239324044442848) LOOCV - WAIC | -0.0012956105583948555 (0.02973944581736434) ---------------------------------------------------------------------- KL = GL - S | 1.161105570223802 (1.2051202161380585) TT = T - T_true | -0.9991698203640712 (1.8341441540735908) WT = WAIC - T_true | 1.1492460592459444 (1.5218424853234411) KW = KL + WT | 2.310351629469746 (0.3627806210107444) ---------------------------------------------------------------------- WBIC | 93.12501637677528 (6.984221756458585) FreeEnergy | 92.40576360391438 (6.962418987983377) ---------------------------------------------------------------------- WBIC - FreeEnergy | 0.7192527728609136 (0.4183977423682536) WBIC - T_true | 2.411311971082669 (1.991077828522291) FreeEnergy - T_true | 1.692059198221755 (1.7877247594393728) ====================================================================== correlation of TT and KL = -0.9136491693919635 correlation of WT and WT = -0.9914676309931532 ====================================================================== ========== normal1 model (n = 32) 2λ (std) = 0.9330719597667703 (0.15076764029777262) 2ν (std) = 0.9482629541999565 (0.23651080855026488) ====================================================================== Name | mean (std) ---------------------------------------------------------------------- T | 89.71321551952528 (7.368064527839469) V | 1.896525908399913 (0.47302161710052976) WAIC = T + V | 91.60974142792519 (7.829031589695603) LOOCV | 91.61142810745376 (7.829452021917573) GL | 91.78217302240004 (1.3399528600180424) ---------------------------------------------------------------------- WAIC - GL | -0.17243159447482673 (7.9773306007179405) LOOCV - GL | -0.17074491494625815 (7.977763591516943) LOOCV - WAIC | 0.0016866795285685186 (0.0033420913200539167) ---------------------------------------------------------------------- KL = GL - S | 0.9701068973009779 (1.3399528600180424) TT = T - T_true | -1.0004888861673502 (1.4823572048977707) WT = WAIC - T_true | 0.8960370222325631 (1.5004116791513726) KW = KL + WT | 1.8661439195335405 (0.30153528059554524) ---------------------------------------------------------------------- WBIC | 92.57066964653924 (7.766540471455818) FreeEnergy | 91.97466013585884 (7.666203176930305) ---------------------------------------------------------------------- WBIC - FreeEnergy | 0.5960095106804452 (0.5864672756101755) WBIC - T_true | 1.8569652408466168 (2.0681075319960573) FreeEnergy - T_true | 1.2609557301661718 (1.7162923302533113) ====================================================================== correlation of TT and KL = -0.9876854839085023 correlation of WT and WT = -0.9837908203615242 ====================================================================== ========== normal model (n = 32) 2λ (std) = 1.9557216808208768 (0.41838333719197185) 2ν (std) = 1.80091166751125 (0.33102869477470764) ====================================================================== Name | mean (std) ---------------------------------------------------------------------- T | 89.06190569821592 (8.108806933610438) V | 3.6018233350225 (0.6620573895494153) WAIC = T + V | 92.66372903323841 (8.116944745587524) LOOCV | 92.71657877982683 (8.122926059015976) GL | 92.77348485919502 (1.9653333091267573) ---------------------------------------------------------------------- WAIC - GL | -0.10975582595658047 (8.558186765597165) LOOCV - GL | -0.05690607936820591 (8.564761570196836) LOOCV - WAIC | 0.05284974658837476 (0.08431686367806858) ---------------------------------------------------------------------- KL = GL - S | 1.961418734095962 (1.9653333091267577) TT = T - T_true | -1.6517987074767075 (2.083601038695401) WT = WAIC - T_true | 1.950024627545792 (2.1290880841650597) KW = KL + WT | 3.9114433616417537 (0.8367666743839437) ---------------------------------------------------------------------- WBIC | 94.91599743621809 (8.245736767533247) FreeEnergy | 93.50820328199669 (8.10096824594339) ---------------------------------------------------------------------- WBIC - FreeEnergy | 1.4077941542214105 (1.0038081392697724) WBIC - T_true | 4.202293030525466 (3.052600632531801) FreeEnergy - T_true | 2.7944988763040546 (2.4243462877296116) ====================================================================== correlation of TT and KL = -0.9351331311893791 correlation of WT and WT = -0.9195382809923446 ======================================================================
#data = load("DataStandardNormalSample8Case4649.jld2")
#data = load("DataStandardNormalSample32Case4649.jld2")
data = load("DataStandardNormalSample128Case4649.jld2")
for v in sort(collect(keys(data)))
ex = parse("$v = data[\"$v\"]")
#println(ex)
eval(ex)
end
plotTh12("mixnormal")
plotTh12("normal1")
plotTh12("normal")
plotAllModelSelections("T")
plotAllModelSelections("GL")
plotAllModelSelections("WAIC")
plotAllModelSelections("LOOCV")
plotAllModelSelections("WBIC")
plotAllModelSelections("FreeEnergy");
========== mixnormal model (n = 128) 2λ (std) = 1.3132198826488952 (0.11051169524376306) 2ν (std) = 1.2434679742589019 (0.3893114662850205) ====================================================================== Name | mean (std) ---------------------------------------------------------------------- T | 362.4013518528037 (14.810658604163986) V | 2.4869359485178038 (0.778622932570041) WAIC = T + V | 364.8882878013216 (15.517291034838108) LOOCV | 364.89090087933636 (15.516582394095808) GL | 364.57523818609155 (1.3700130033861326) ---------------------------------------------------------------------- WAIC - GL | 0.31304961523019587 (15.217151166016553) LOOCV - GL | 0.3156626932450165 (15.216473397507801) LOOCV - WAIC | 0.002613078014820303 (0.016397337807532348) ---------------------------------------------------------------------- KL = GL - S | 1.326973685695199 (1.3700130033861324) TT = T - T_true | -1.1874698689152123 (1.8987780812446018) WT = WAIC - T_true | 1.2994660796025912 (1.5205962827214443) KW = KL + WT | 2.6264397652977904 (0.22102339048752612) ---------------------------------------------------------------------- WBIC | 367.24496873183443 (15.095970552245388) FreeEnergy | 365.5825765954295 (15.096921251818156) ---------------------------------------------------------------------- WBIC - FreeEnergy | 1.6623921364047805 (1.1796949669415928) WBIC - T_true | 3.6561470101153746 (2.7463666443529444) FreeEnergy - T_true | 1.9937548737105935 (1.9387579307210285) ====================================================================== correlation of TT and KL = -0.9255381674740645 correlation of WT and WT = -0.9937174649007365 ====================================================================== ========== normal1 model (n = 128) 2λ (std) = 0.9872297188536074 (0.09104830972127553) 2ν (std) = 0.9901284690677659 (0.13409535988613622) ====================================================================== Name | mean (std) ---------------------------------------------------------------------- T | 362.5914240857164 (15.829997087659219) V | 1.9802569381355317 (0.26819071977227243) WAIC = T + V | 364.57168102385185 (16.068948210559768) LOOCV | 364.5717505913329 (16.06894672700084) GL | 364.2398646359707 (1.363604295771297) ---------------------------------------------------------------------- WAIC - GL | 0.3318163878813046 (16.18741792385838) LOOCV - GL | 0.3318859553623783 (16.18741488768318) LOOCV - WAIC | 6.956748107404565e-5 (0.0007035589518467509) ---------------------------------------------------------------------- KL = GL - S | 0.9916001355743571 (1.3636042957712966) TT = T - T_true | -0.9973976360026738 (1.390112033637219) WT = WAIC - T_true | 0.9828593021328578 (1.4012246710907141) KW = KL + WT | 1.9744594377072149 (0.18209661944255107) ---------------------------------------------------------------------- WBIC | 366.141186165668 (16.103590815617803) FreeEnergy | 364.95603733863896 (15.999726040888252) ---------------------------------------------------------------------- WBIC - FreeEnergy | 1.1851488270292296 (0.8469179151774902) WBIC - T_true | 2.55236444394917 (2.084788968791164) FreeEnergy - T_true | 1.3672156169199396 (1.558645134078157) ====================================================================== correlation of TT and KL = -0.995186784434587 correlation of WT and WT = -0.9916931960344842 ====================================================================== ========== normal model (n = 128) 2λ (std) = 1.9912157379734927 (0.2261384408382408) 2ν (std) = 1.934185419939284 (0.20383237518918526) ====================================================================== Name | mean (std) ---------------------------------------------------------------------- T | 361.70670901233996 (16.070556650363827) V | 3.868370839878568 (0.4076647503783705) WAIC = T + V | 365.57507985221827 (16.073936379895116) LOOCV | 365.5789309425814 (16.07394617064168) GL | 365.24443784584395 (1.9146146999184985) ---------------------------------------------------------------------- WAIC - GL | 0.3306420063744822 (16.169756406600527) LOOCV - GL | 0.33449309673736194 (16.16972509139636) LOOCV - WAIC | 0.0038510903628798587 (0.00753071050782785) ---------------------------------------------------------------------- KL = GL - S | 1.9961733454476547 (1.9146146999184979) TT = T - T_true | -1.8821127093792347 (1.9725632751261004) WT = WAIC - T_true | 1.986258130499333 (1.981920621854508) KW = KL + WT | 3.9824314759469854 (0.4522768816764816) ---------------------------------------------------------------------- WBIC | 368.6885413201743 (16.30779423340957) FreeEnergy | 366.30698047655505 (16.125020854918912) ---------------------------------------------------------------------- WBIC - FreeEnergy | 2.3815608436194133 (1.4037875138591407) WBIC - T_true | 5.099719598455353 (3.260819837669346) FreeEnergy - T_true | 2.7181587548359407 (2.3081256996668325) ====================================================================== correlation of TT and KL = -0.9793187354190306 correlation of WT and WT = -0.9736436602286695 ======================================================================