versioninfo() # http://yomichi.hateblo.jp/entry/2015/09/27/151257 # https://gist.github.com/yomichi/def5921f6b81eb5f7b44 #= Yuichi Motoyama 2015 This is distributed under the Boost Software License Version 1.0 http://www.boost.org/LICENSE_1_0.txt =# @enum DepwarnFlag DepwarnOff=0 DepwarnOn=1 DepwarnError=2 doc""" - `switch_depwarn!(flag :: Bool)` - `switch_depwarn!(flag :: DepwarnFlag)` Enable/Disable deprecation warning. - `DepwarnOff` or `false` : switch off deprecation warning - `DepwarnOn` or `true` : switch on deprecation warning - `DepwarnError` : turn deprecation warning into error """ switch_depwarn!(flag :: Bool) = switch_depwarn!(flag ? DepwarnOn : DepwarnOff) function switch_depwarn!(flag :: DepwarnFlag) old_opt = Base.JLOptions() params = map(fieldnames(Base.JLOptions)) do name name == :depwarn ? Int(flag) : getfield(old_opt, name) end new_opt = Base.JLOptions(params...) unsafe_store!(cglobal(:jl_options, Base.JLOptions), new_opt) flag end # one-liner # unsafe_store!(cglobal(:jl_options, Base.JLOptions), Base.JLOptions(map(fieldnames(Base.JLOptions)) do name; name==:depwarn?0:getfield(Base.JLOptions(), name) end...)) switch_depwarn!(false) using PyPlot using Distributions # xlog(x,y) = if x == 0 then 0 else x*log(x/y) # # Warning: # # Don't use x*log(x/y) instead of xlog(x,y). # Because log(0) = -Inf and 0*Inf = NaN. # function xlog(x::Float64, y::Float64) if x == zero(x) return zero(x) elseif y == zero(y) return Inf else return x*log(x/y) end end xlog(x,y) = xlog(Float64(x),Float64(y)) x, y = 0, 1 @show x, y @show x*log(x/y) @show xlog(x,y); prodprob(p::Float64, q::Float64) = [p*q, p*(1-q), (1-p)*q, (1-p)*(1-q)] function expected(a::AbstractArray{T,1}) where T n = sum(a) return [(a[1]+a[2])*(a[1]+a[3])/n, (a[1]+a[2])*(a[2]+a[4])/n, (a[3]+a[4])*(a[1]+a[3])/n, (a[3]+a[4])*(a[2]+a[4])/n] end function chisqtest(a::AbstractArray{Int64,1}) mu = expected(a) chisq = sum((a .- mu).^2 ./mu) pval = ccdf(Chisq(1), chisq) return pval end function gtest(a::AbstractArray{Int64,1}) mu = expected(a) g = 2*sum(xlog.(a,mu)) # Don't use a.*log(a./mu) pval = ccdf(Chisq(1), g) return pval end function pvaluehg(d::Hypergeometric, k::Int64) c = params(d) amax = min(c[1],c[3]) p0 = pdf(d, k) p1 = 0.0 pval = 0.0 for j in 0:amax p1 = pdf(d, j) pval += ifelse(p1 ≤ p0, p1, 0.0) end return min(pval, 1.0) end function fishertest(a::AbstractArray{Int64,1}) d = Hypergeometric(a[1]+a[2], a[3]+a[4], a[1]+a[3]) return pvaluehg(d, a[1]) end # a が補正パラメーター # a を小さくするほどP値はより小さくなる. # a = 1.0 で通常のFisher検定の場合に戻る. # a = 0.5 で mid-p 版になる. # function pvaluehg_corr(d::Hypergeometric, k::Int64; a=0.5) c = params(d) amax = min(c[1],c[3]) p0 = pdf(d, k) p1 = 0.0 pval = 0.0 for j in 0:amax p1 = pdf(d, j) pval += ifelse(p1 == p0, a*p1, ifelse(p1 < p0, p1, 0.0)) end return min(pval, 1.0) end function fishertest_corr(a::AbstractArray{Int64,1}) d = Hypergeometric(a[1]+a[2], a[3]+a[4], a[1]+a[3]) return pvaluehg_corr(d, a[1]) end ecdf(pval::AbstractArray{Float64,1}, x::Float64) = count(p -> p ≤ x, pval)/length(pval) ecdf(pval, x) = ecdf(pval, Float64(x)) function randPoisson(n::Int64, p::AbstractArray{Float64}, N::Int64) return [ rand(Poisson(n*p[1]),N)'; rand(Poisson(n*p[2]),N)'; rand(Poisson(n*p[3]),N)'; rand(Poisson(n*p[4]),N)'; ] end function randMultinomial(n::Int64, p::AbstractArray{Float64}, N::Int64) return rand(Multinomial(n,p), N) end function randBinomial(n::Int64, p::AbstractArray{Float64}, N::Int64) m = Int64(round(n*(p[1]+p[2]))) q1 = p[1]/(p[1]+p[2]) q2 = p[3]/(p[3]+p[4]) return [rand(Multinomial(m, [q1, 1.0-q1]), N); rand(Multinomial(n-m, [q2, 1.0-q2]), N)] end function randHypergeometric(n::Int64, p::AbstractArray{Float64}, N::Int64) m1 = Int64(round(n*(p[1]+p[2]))) m2 = Int64(round(n*(p[3]+p[4]))) n1 = Int64(round(n*(p[1]+p[3]))) a = rand(Hypergeometric(m1, m2, n1), N)' b = m1 .- a c = n1 .- a d = m2 .- c return [a; b; c; d] end function pvaluesby(sampler::T, n::Int64; N = 10^4, alpha = 0.05) where T<:Function px = collect(0.05:0.05:0.50) py = px np = length(px) prob_chisq = Array{Float64,2}(np,np) prob_g = Array{Float64,2}(np,np) prob_fisher = Array{Float64,2}(np,np) prob_fisher_c = Array{Float64,2}(np,np) a = Array{Float64,2}(4,N) for i in 1:np for j in 1:np if i > j && sampler != randBinomial prob_chisq[i,j] = prob_chisq[j,i] prob_g[i,j] = prob_g[j,i] prob_fisher[i,j] = prob_fisher[j,i] prob_fisher_c[i,j] = prob_fisher_c[j,i] else a = sampler(n, prodprob(px[i],py[j]), N) prob_chisq[i,j] = ecdf([chisqtest(a[:,i]) for i in 1:size(a,2)], alpha) prob_g[i,j] = ecdf([gtest(a[:,i]) for i in 1:size(a,2)], alpha) prob_fisher[i,j] = ecdf([fishertest(a[:,i]) for i in 1:size(a,2)], alpha) prob_fisher_c[i,j] = ecdf([fishertest_corr(a[:,i]) for i in 1:size(a,2)], alpha) end end end return alpha, px, prob_chisq, prob_g, prob_fisher, prob_fisher_c end function plotcomparisontest(sampler, n; N = 10^4, alpha = 0.05) alpha, px, prob_chisq, prob_g, prob_fisher, prob_fisher_c = pvaluesby(sampler, n; N = N, alpha = alpha) py = px np = length(px) ps = [0.0;px] vmin = 0.0 vmax = 2*alpha cmap = "RdBu_r" figure(figsize=(8,6.4)) ax1 = subplot2grid((16,20), (0,0), rowspan=7, colspan=8) pcolormesh(ps, ps, prob_chisq, cmap=cmap, vmin=vmin, vmax=vmax) colorbar(label="probability of p-value \$\\leqq\$ $alpha") title("\$\\chi^2\$-test") xlabel("marginal probability", fontsize=8) ylabel("marginal probability", fontsize=8) ax2 = subplot2grid((16,20), (0,12), rowspan=7, colspan=8) pcolormesh(ps, ps, prob_g, cmap=cmap, vmin=vmin, vmax=vmax) colorbar(label="probability of p-value \$\\leqq\$ $alpha") title("G-test") xlabel("marginal probability", fontsize=8) ylabel("marginal probability", fontsize=8) ax3 = subplot2grid((16,20), (9,0), rowspan=7, colspan=8) pcolormesh(ps, ps, prob_fisher, cmap=cmap, vmin=vmin, vmax=vmax) colorbar(label="probability of p-value \$\\leqq\$ $alpha") title("Fisher's exact test") xlabel("marginal probability", fontsize=8) ylabel("marginal probability", fontsize=8) ax3 = subplot2grid((16,20), (9,12), rowspan=7, colspan=8) pcolormesh(ps, ps, prob_fisher_c, cmap=cmap, vmin=vmin, vmax=vmax) colorbar(label="probability of p-value \$\\leqq\$ $alpha") title("mid-p Fisher's exact test") xlabel("marginal probability", fontsize=8) ylabel("marginal probability", fontsize=8) suptitle("sampler = $(typeof(sampler)), n = $n, \$\\alpha\$ = $alpha") end @time plotcomparisontest(randPoisson, 25, N=10000, alpha=0.05) @time plotcomparisontest(randPoisson, 50, N=10000, alpha=0.05) @time plotcomparisontest(randPoisson, 100, N=10000, alpha=0.05) @time plotcomparisontest(randPoisson, 200, N=10000, alpha=0.05) @time plotcomparisontest(randMultinomial, 25, N=10000, alpha=0.05) @time plotcomparisontest(randMultinomial, 50, N=10000, alpha=0.05) @time plotcomparisontest(randMultinomial, 100, N=10000, alpha=0.05) @time plotcomparisontest(randMultinomial, 200, N=10000, alpha=0.05) @time plotcomparisontest(randBinomial, 25, N=10000, alpha=0.05) @time plotcomparisontest(randBinomial, 50, N=10000, alpha=0.05) @time plotcomparisontest(randBinomial, 100, N=10000, alpha=0.05) @time plotcomparisontest(randBinomial, 200, N=10000, alpha=0.05) @time plotcomparisontest(randHypergeometric, 25, N=10000, alpha=0.05) @time plotcomparisontest(randHypergeometric, 50, N=10000, alpha=0.05) @time plotcomparisontest(randHypergeometric, 100, N=10000, alpha=0.05) @time plotcomparisontest(randHypergeometric, 200, N=10000, alpha=0.05) function randprob(n, p, q) P = prodprob(p,q) R = similar(P) for i in 1:10000 R = P .* (1.0 .+ 0.5*rand(4)) R = R/sum(R) if 0.4 < chisqtest(Int64.(round.(n*R))) < 0.6 break end end return R end function randpvaluesby(sampler::T, n::Int64; N = 10^4, alpha = 0.05) where T<:Function px = collect(0.05:0.05:0.50) py = px np = length(px) prob_chisq = Array{Float64,2}(np,np) prob_g = Array{Float64,2}(np,np) prob_fisher = Array{Float64,2}(np,np) prob_fisher_c = Array{Float64,2}(np,np) a = Array{Float64,2}(4,N) for i in 1:np for j in 1:np a = sampler(n, randprob(n, px[i], py[j]), N) prob_chisq[i,j] = ecdf([chisqtest(a[:,i]) for i in 1:size(a,2)], alpha) prob_g[i,j] = ecdf([gtest(a[:,i]) for i in 1:size(a,2)], alpha) prob_fisher[i,j] = ecdf([fishertest(a[:,i]) for i in 1:size(a,2)], alpha) prob_fisher_c[i,j] = ecdf([fishertest(a[:,i]) for i in 1:size(a,2)], alpha) end end return alpha, px, prob_chisq, prob_g, prob_fisher, prob_fisher_c end function plotrandcomparisontest(sampler, n; N = 10^4, alpha = 0.05) alpha, px, prob_chisq, prob_g, prob_fisher, prob_fisher_c = randpvaluesby(sampler, n; N = N, alpha = alpha) py = px np = length(px) ps = [0.0;px] vmin = 0.0 vmax = 2*alpha cmap = "RdBu_r" figure(figsize=(8,6.4)) ax1 = subplot2grid((16,20), (0,0), rowspan=7, colspan=8) pcolormesh(ps, ps, prob_chisq, cmap=cmap, vmin=vmin, vmax=vmax) colorbar(label="probability of p-value \$\\leqq\$ $alpha") title("\$\\chi^2\$-test") xlabel("marginal probability", fontsize=8) ylabel("marginal probability", fontsize=8) ax2 = subplot2grid((16,20), (0,12), rowspan=7, colspan=8) pcolormesh(ps, ps, prob_g, cmap=cmap, vmin=vmin, vmax=vmax) colorbar(label="probability of p-value \$\\leqq\$ $alpha") title("G-test") xlabel("marginal probability", fontsize=8) ylabel("marginal probability", fontsize=8) ax3 = subplot2grid((16,20), (9,0), rowspan=7, colspan=8) pcolormesh(ps, ps, prob_fisher, cmap=cmap, vmin=vmin, vmax=vmax) colorbar(label="probability of p-value \$\\leqq\$ $alpha") title("Fisher's exact test") xlabel("marginal probability", fontsize=8) ylabel("marginal probability", fontsize=8) ax4 = subplot2grid((16,20), (9,12), rowspan=7, colspan=8) pcolormesh(ps, ps, prob_fisher_c, cmap=cmap, vmin=vmin, vmax=vmax) colorbar(label="probability of p-value \$\\leqq\$ $alpha") title("mid-p Fisher's exact test") xlabel("marginal probability", fontsize=8) ylabel("marginal probability", fontsize=8) suptitle("Dependent case: sampler = $(typeof(sampler)), n = $n, \$\\alpha\$ = $alpha") end n = 100 @show p = prodprob(0.2,0.3) @show r = randprob(n,0.2,0.3) chisqtest(Int.(round.(n*r))) n = 100 @time reshape([chisqtest(Int64.(round.(n*randprob(n,p,q)))) for p in 0.05:0.05:0.5 for q in 0.05:0.05:0.5],10,10) @time plotcomparisontest(randMultinomial, 25, N=10000, alpha=0.05) @time plotrandcomparisontest(randMultinomial, 25, N=10000, alpha=0.05) @time plotcomparisontest(randMultinomial, 50, N=10000, alpha=0.05) @time plotrandcomparisontest(randMultinomial, 50, N=10000, alpha=0.05) @time plotcomparisontest(randMultinomial, 100, N=10000, alpha=0.05) @time plotrandcomparisontest(randMultinomial, 100, N=10000, alpha=0.05) @time plotcomparisontest(randMultinomial, 200, N=10000, alpha=0.05) @time plotrandcomparisontest(randMultinomial, 200, N=10000, alpha=0.05) @time plotcomparisontest(randBinomial, 25, N=10000, alpha=0.05) @time plotrandcomparisontest(randBinomial, 25, N=10000, alpha=0.05) @time plotcomparisontest(randBinomial, 50, N=10000, alpha=0.05) @time plotrandcomparisontest(randBinomial, 50, N=10000, alpha=0.05) @time plotcomparisontest(randBinomial, 100, N=10000, alpha=0.05) @time plotrandcomparisontest(randBinomial, 100, N=10000, alpha=0.05) @time plotcomparisontest(randBinomial, 200, N=10000, alpha=0.05) @time plotrandcomparisontest(randBinomial, 200, N=10000, alpha=0.05)