# Julia v1.0 には存在しないパッケージの読み込みと函数の定義 using Printf linspace(start, stop, length) = range(start, stop=stop, length=length) # 確率分布を扱うために使用 using Distributions # supノルムを最小化する分散を求めるために使用」 using Optim # KL情報量を積分で計算するために使用 using QuadGK # 散布図の色付けのために使用 using KernelDensity # プロットに使用 using PyPlot ?Logistic @show dist_logistic = Logistic(0.0, 1.0); ?Normal @show sigma_logistic = std(dist_logistic) @show dist_best_normal_approx = Normal(0.0, sigma_logistic) sleep(0.1) x = linspace(-8,8,401) figure() plot(x, pdf.(dist_logistic, x), label="logistic", color="k", lw=1) plot(x, pdf.(dist_best_normal_approx, x), label="best normal approx.", color="r", ls="--") grid() legend(); function supnorm(f, g; a=-10.0, b=10.0, L=1001) x = linspace(a, b, L) return maximum(abs.(f.(x) .- g.(x))) end pdf_logistic = (x -> pdf(dist_logistic, x)) pdf_normal(sigma) = (x -> pdf(Normal(0.0, sigma),x)) f(sigma) = supnorm(pdf_logistic, pdf_normal(sigma)) o = optimize(f, 0.1, 10) show(o) @show sigma_supnorm_normal_approx = o.minimizer @show dist_supnorm_normal_approx = Normal(0.0, sigma_supnorm_normal_approx) sleep(0.1) x = linspace(-8,8,401) figure() plot(x, pdf.(dist_logistic, x), label="logistic", color="k", lw=1) plot(x, pdf.(dist_supnorm_normal_approx, x), label="supnorm normal approx.", color="b", ls="-.") grid() legend(); function KullbackLeibler(dist_q, dist_p; a=-10.0, b=10.0) q(x) = pdf(dist_q, x) logq(x) = logpdf(dist_q, x) p(x) = pdf(dist_p, x) logp(x) = logpdf(dist_p, x) f(x) = (q(x) == zero(q(x)) ? zero(q(x)) : q(x)*(logq(x) - logp(x))) return quadgk(f, a, b)[1], f end KL_b, f_b = KullbackLeibler(dist_logistic, dist_best_normal_approx) KL_s, f_s = KullbackLeibler(dist_logistic, dist_supnorm_normal_approx) println("Kullback-Leibler informations:") println("* D(dist_logistic||dist_best_normal_approx) = $KL_b") println("* D(dist_logistic||dist_supnorm_normal_approx) = $KL_s") sleep(0.1) x = linspace(-10, 10, 401) figure() axhline(0.0, color="k", lw=0.8) plot(x, f_b.(x), label="best normal approx.", color="r", ls="--") plot(x, f_s.(x), label="supnorm normal approx.", color="b", ls="-.") legend() grid() title("integrands of KL informations") function AIC_normal(sample) sigma = std(sample, corrected=false, mean=0.0) logpred(x) = logpdf(Normal(0.0, sigma), x) return -2*sum(logpred.(sample)) + 2 end AIC_logistic(sample) = -2*sum(logpdf.(dist_logistic, sample)) ######### Tsets n = 2^4 sample_best_normal_approx = rand(dist_best_normal_approx, n) @show AIC_normal(sample_best_normal_approx) @show AIC_logistic(sample_best_normal_approx) sample_supnorm_normal_approx = rand(dist_supnorm_normal_approx, n) @show AIC_normal(sample_supnorm_normal_approx) @show AIC_logistic(sample_supnorm_normal_approx) sample_logistic = rand(dist_logistic, n) @show AIC_normal(sample_logistic) @show AIC_logistic(sample_logistic) ; function simulation_by(dist; n = 2^8, L = 10^4) AIC_n = Array{Float64}(undef, L) AIC_l = Array{Float64}(undef, L) for i in 1:L sample = rand(dist, n) AIC_n[i] = AIC_normal(sample) AIC_l[i] = AIC_logistic(sample) end ratio_logstic = count(AIC_l .< AIC_n)/L return ratio_logstic, AIC_n, AIC_l end function plotdiffs(n, L, AIC_n_b, AIC_l_b, AIC_n_s, AIC_l_s) figure(figsize=(10,4)) subplot2grid((9,20), ( 1, 0), rowspan=8, colspan=10) title("Samples are generated by best normal approx.", fontsize=11) xlabel("AIC_normal \$-\$ AIC_logistic") ylabel("empirical probability") h = plt[:hist](AIC_n_b - AIC_l_b, normed=true, bins=100) axvline(x=0.0, color="k", ls="--", lw=1) r_str = @sprintf("%.2f", 100*count(AIC_n_b .> AIC_l_b)/L) text(1.0, 0.7*maximum(h[1]), "→ $r_str%", color="red") grid(ls=":") #legend() subplot2grid((9,20), ( 1, 10), rowspan=8, colspan=10) title("Samples are generated by supnorm normal approx.", fontsize=11) xlabel("AIC_normal \$-\$ AIC_logistic") ylabel("empirical probability") h = plt[:hist](AIC_n_s - AIC_l_s, normed=true, bins=100) axvline(x=0.0, color="k", ls="--", lw=1) r_str = @sprintf("%.2f", 100*count(AIC_n_s .> AIC_l_s)/L) text(1.0, 0.7*maximum(h[1]), "→ $r_str%", color="red") grid(ls=":") #legend() suptitle("===== Sample size n = $n, Iterations L = $L =====") tight_layout() end function plotscatters(n, L, AIC_n_b, AIC_l_b, AIC_n_s, AIC_l_s) ik_b = InterpKDE(kde((AIC_n_b, AIC_l_b))) ik_s = InterpKDE(kde((AIC_n_s, AIC_l_s))) figure(figsize=(10,5)) subplot2grid((11,20), ( 1, 0), rowspan=10, colspan=10, facecolor="darkgray") title("Samples are generated by best normal approx.", fontsize=11) xlabel("AIC of the normal model") ylabel("AIC of the logistic model") # scatter(AIC_n_b, AIC_l_b, c=pdf.(ik_b, AIC_n_b, AIC_l_b), cmap="CMRmap", s=2) scatter(AIC_n_b, AIC_l_b, c=[pdf(ik_b, AIC_n_b[i], AIC_l_b[i]) for i in eachindex(AIC_n_b)], cmap="CMRmap", s=2) xmin = minimum(AIC_n_b) xmax = maximum(AIC_n_b) plot([xmin, xmax], [xmin, xmax], color="cyan", ls="--", label="x = y") r_str = @sprintf("%.2f", 100*count(AIC_n_b .> AIC_l_b)/L) text(0.5*(xmin+xmax)+0.1*(xmax-xmin), 0.5*(xmin+xmax)-0.1*(xmax-xmin), "$r_str%", color="red") grid(ls=":", color="w") legend() subplot2grid((11,20), ( 1, 10), rowspan=10, colspan=10, facecolor="darkgray") title("Samples are generated by supnorm normal approx.", fontsize=11) xlabel("AIC of the normal model") ylabel("AIC of the logistic model") # scatter(AIC_n_s, AIC_l_s, c=pdf.(ik_b, AIC_n_s, AIC_l_s), cmap="CMRmap", s=2) scatter(AIC_n_s, AIC_l_s, c=[pdf(ik_b, AIC_n_s[i], AIC_l_s[i]) for i in eachindex(AIC_n_s)], cmap="CMRmap", s=2) xmin = minimum(AIC_n_s) xmax = maximum(AIC_n_s) plot([xmin, xmax], [xmin, xmax], color="cyan", ls="--", label="x = y") r_str = @sprintf("%.2f", 100*count(AIC_n_s .> AIC_l_s)/L) text(0.5*(xmin+xmax)+0.1*(xmax-xmin), 0.5*(xmin+xmax)-0.1*(xmax-xmin), "$r_str%", color="red") grid(ls=":", color="w") legend() suptitle("===== Sample size n = $n, Iterations L = $L =====") tight_layout() end n = 2^7 L = 10^4 @time ratio_logistic_b, AIC_n_b, AIC_l_b = simulation_by(dist_best_normal_approx, n=n, L=L) @time ratio_logistic_s, AIC_n_s, AIC_l_s = simulation_by(dist_supnorm_normal_approx, n=n, L=L) println() @show ratio_logistic_b @show ratio_logistic_s sleep(0.1) plotdiffs(n, L, AIC_n_b, AIC_l_b, AIC_n_s, AIC_l_s) plotscatters(n, L, AIC_n_b, AIC_l_b, AIC_n_s, AIC_l_s) n = 2^8 L = 10^4 @time ratio_logistic_b, AIC_n_b, AIC_l_b = simulation_by(dist_best_normal_approx, n=n, L=L) @time ratio_logistic_s, AIC_n_s, AIC_l_s = simulation_by(dist_supnorm_normal_approx, n=n, L=L) println() @show ratio_logistic_b @show ratio_logistic_s sleep(0.1) plotdiffs(n, L, AIC_n_b, AIC_l_b, AIC_n_s, AIC_l_s) plotscatters(n, L, AIC_n_b, AIC_l_b, AIC_n_s, AIC_l_s) @time let ks = 2:12 L = 10^4 r_b = Array{Float64}(undef, length(ks)) r_s = Array{Float64}(undef, length(ks)) i = 0 for k in ks n = 2^k i += 1 r_b[i], = simulation_by(dist_best_normal_approx, n=n, L=L) r_s[i], = simulation_by(dist_supnorm_normal_approx, n=n, L=L) end sleep(0.1) figure() plot(ks, r_b, label="best normal approx.", color="r", ls="--") plot(ks, r_s, label="supnorm normal approx.", color="b", ls="-.") grid() xlabel("\$\\log_2\$(sample size)") ylabel("empirical probability ($L iterations)") legend() title("probability of the logistic model selected") end