Notebook

In [1]:

using Random
Random.seed!(4649373)

Out[1]:

TaskLocalRNG()

In [2]:

using Distributions
using QuadGK
using Roots

function winningrate(
        distx::ContinuousUnivariateDistribution,
        disty::ContinuousUnivariateDistribution
    )
    quadgk(y -> cdf(distx, y)*pdf(disty, y), extrema(disty)...)[1]
end

function tieshift(
        distx::ContinuousUnivariateDistribution,
        disty::ContinuousUnivariateDistribution;
        p = 1/2
    )
    find_zero(0.0) do a
        winningrate(distx + a, disty) - p
    end
end

@show distx = Exponential(1)
@show disty = Exponential(1.1)
@show distx = distx - mean(distx)
@show disty = disty - mean(disty)
@show winningrate(distx, disty)
@show a = tieshift(distx, disty)
@show winningrate(distx + a, disty);9

distx = Exponential(1) = Exponential{Float64}(θ=1.0)
disty = Exponential(1.1) = Exponential{Float64}(θ=1.1)
distx = distx - mean(distx) = LocationScale{Float64, Continuous, Exponential{Float64}}(
μ: -1.0
σ: 1.0
ρ: Exponential{Float64}(θ=1.0)
)

disty = disty - mean(disty) = LocationScale{Float64, Continuous, Exponential{Float64}}(
μ: -1.1
σ: 1.0
ρ: Exponential{Float64}(θ=1.1)
)

winningrate(distx, disty) = 0.47829085338031047
a = tieshift(distx, disty) = -0.04882798316870562
winningrate(distx + a, disty) = 0.49999999999999994

Out[2]:

In [3]:

@show distx = Exponential(1)
@show disty = Exponential(1.1)
@show distx = distx - median(distx)
@show disty = disty - median(disty)
@show winningrate(distx, disty)
@show a = tieshift(distx, disty)
@show winningrate(distx + a, disty);

distx = Exponential(1) = Exponential{Float64}(θ=1.0)
disty = Exponential(1.1) = Exponential{Float64}(θ=1.1)
distx = distx - median(distx) = LocationScale{Float64, Continuous, Exponential{Float64}}(
μ: -0.6931471805599453
σ: 1.0
ρ: Exponential{Float64}(θ=1.0)
)

disty = disty - median(disty) = LocationScale{Float64, Continuous, Exponential{Float64}}(
μ: -0.7624618986159398
σ: 1.0
ρ: Exponential{Float64}(θ=1.1)
)

winningrate(distx, disty) = 0.491820953163202
a = tieshift(distx, disty) = -0.01814270122470007
winningrate(distx + a, disty) = 0.5

In [4]:

@show distx = LogNormal()
@show disty = 1.1LogNormal()
@show distx = distx - mean(distx)
@show disty = disty - mean(disty)
@show winningrate(distx, disty)
@show a = tieshift(distx, disty)
@show winningrate(distx + a, disty);

distx = LogNormal() = LogNormal{Float64}(μ=0.0, σ=1.0)
disty = 1.1 * LogNormal() = LocationScale{Float64, Continuous, LogNormal{Float64}}(
μ: 0.0
σ: 1.1
ρ: LogNormal{Float64}(μ=0.0, σ=1.0)
)

distx = distx - mean(distx) = LocationScale{Float64, Continuous, LogNormal{Float64}}(
μ: -1.6487212707001282
σ: 1.0
ρ: LogNormal{Float64}(μ=0.0, σ=1.0)
)

disty = disty - mean(disty) = LocationScale{Float64, Continuous, LogNormal{Float64}}(
μ: -1.813593397770141
σ: 1.1
ρ: LogNormal{Float64}(μ=0.0, σ=1.0)
)

winningrate(distx, disty) = 0.47076014690097634
a = tieshift(distx, disty) = -0.08680884653722044
winningrate(distx + a, disty) = 0.5

In [5]:

@show distx = Exponential(1)
@show disty = Exponential(1.1)
@show distx = distx - median(distx)
@show disty = disty - median(disty)
@show winningrate(distx, disty)
@show a = tieshift(distx, disty)
@show winningrate(distx + a, disty);

distx = Exponential(1) = Exponential{Float64}(θ=1.0)
disty = Exponential(1.1) = Exponential{Float64}(θ=1.1)
distx = distx - median(distx) = LocationScale{Float64, Continuous, Exponential{Float64}}(
μ: -0.6931471805599453
σ: 1.0
ρ: Exponential{Float64}(θ=1.0)
)

disty = disty - median(disty) = LocationScale{Float64, Continuous, Exponential{Float64}}(
μ: -0.7624618986159398
σ: 1.0
ρ: Exponential{Float64}(θ=1.1)
)

winningrate(distx, disty) = 0.491820953163202
a = tieshift(distx, disty) = -0.01814270122470007
winningrate(distx + a, disty) = 0.5

In [6]:

c(x...) = [x...]

@show sigma1 = c(1, 2, 3, 4, 4, 4, 4)
@show sigma2 = c(4, 4, 4, 4, 3, 2, 1)
@show sigmas1 = @. sqrt(log((1 + sqrt((4 * sigma1^2)/25 + 1))/2))
@show sigmas2 = @. sqrt(log((1 + sqrt((4 * sigma2^2)/25 + 1))/2))
@show mu1 = log(5)
@show mu2 = log(5)
@show lognormals1 = LogNormal.(mu1, sigmas1)
@show lognormals2 = LogNormal.(mu2, sigmas2)
@show median.(lognormals1)
@show median.(lognormals2)
@show winningrate.(lognormals1, lognormals2)
@show tieshift.(lognormals1, lognormals2);

sigma1 = c(1, 2, 3, 4, 4, 4, 4) = [1, 2, 3, 4, 4, 4, 4]
sigma2 = c(4, 4, 4, 4, 3, 2, 1) = [4, 4, 4, 4, 3, 2, 1]
sigmas1 = #= In[6]:5 =# @__dot__(sqrt(log((1 + sqrt((4 * sigma1 ^ 2) / 25 + 1)) / 2))) = [0.19440481973705104, 0.36235656579549336, 0.4976550144706184, 0.6058053602170826, 0.6058053602170826, 0.6058053602170826, 0.6058053602170826]
sigmas2 = #= In[6]:6 =# @__dot__(sqrt(log((1 + sqrt((4 * sigma2 ^ 2) / 25 + 1)) / 2))) = [0.6058053602170826, 0.6058053602170826, 0.6058053602170826, 0.6058053602170826, 0.4976550144706184, 0.36235656579549336, 0.19440481973705104]
mu1 = log(5) = 1.6094379124341003
mu2 = log(5) = 1.6094379124341003
lognormals1 = LogNormal.(mu1, sigmas1) = LogNormal{Float64}[LogNormal{Float64}(μ=1.6094379124341003, σ=0.19440481973705104), LogNormal{Float64}(μ=1.6094379124341003, σ=0.36235656579549336), LogNormal{Float64}(μ=1.6094379124341003, σ=0.4976550144706184), LogNormal{Float64}(μ=1.6094379124341003, σ=0.6058053602170826), LogNormal{Float64}(μ=1.6094379124341003, σ=0.6058053602170826), LogNormal{Float64}(μ=1.6094379124341003, σ=0.6058053602170826), LogNormal{Float64}(μ=1.6094379124341003, σ=0.6058053602170826)]
lognormals2 = LogNormal.(mu2, sigmas2) = LogNormal{Float64}[LogNormal{Float64}(μ=1.6094379124341003, σ=0.6058053602170826), LogNormal{Float64}(μ=1.6094379124341003, σ=0.6058053602170826), LogNormal{Float64}(μ=1.6094379124341003, σ=0.6058053602170826), LogNormal{Float64}(μ=1.6094379124341003, σ=0.6058053602170826), LogNormal{Float64}(μ=1.6094379124341003, σ=0.4976550144706184), LogNormal{Float64}(μ=1.6094379124341003, σ=0.36235656579549336), LogNormal{Float64}(μ=1.6094379124341003, σ=0.19440481973705104)]
median.(lognormals1) = [4.999999999999999, 4.999999999999999, 4.999999999999999, 4.999999999999999, 4.999999999999999, 4.999999999999999, 4.999999999999999]
median.(lognormals2) = [4.999999999999999, 4.999999999999999, 4.999999999999999, 4.999999999999999, 4.999999999999999, 4.999999999999999, 4.999999999999999]
winningrate.(lognormals1, lognormals2) = [0.5000000000000007, 0.4999999999999974, 0.49999999999999734, 0.4999999999999974, 0.5000000000000001, 0.5000000000000002, 0.5]
tieshift.(lognormals1, lognormals2) = [0.0, -2.1993057631542488e-14, -2.431582788827642e-14, -2.555874980835436e-14, 0.0, 0.0, 0.0]

In [7]:

using Distributions

safediv(x, y) = x == 0 ? x : isinf(y) ? zero(y) : x/y

"""
    h_brunner_munzel(x, y)

この函数は, x < y のとき 1.0 を, x = y のとき 0.5 を, それら以外のとき 0.0 返す.
"""
h_brunner_munzel(x, y) = (x < y) + (x == y)/2

@doc raw"""
    brunner_munzel_test(X, Y; p = 1/2)

この函数は数値のベクトルのデータ `X`, `Y` について, Brunner-Munzel検定関係の統計量達を計算する. 詳細は以下の通り.

函数 $H(x, y)$ と $\hat{p}$, $H^x_i$, $H^y_j$, $\bar{H}^x$, $\bar{H}^y$ を次のように定める:

```math
\begin{aligned}
&
m = \mathrm{length}(X), \quad
n = \mathrm{length}(Y), \quad
x_i = X[i], \quad
y_j = Y[j],
\\ &
\hat{p} = \frac{1}{mn}\sum_{i=1}^m \sum_{j=1}^n H(x_i, y_j),
\\ &
H(x, y) = \begin{cases} 1 & (x < y) \\ 1/2 & (x = y) \\ 0 & (x > y), \end{cases}
\\ &
H^x_i = \sum_{j=1}^n H(y_j, x_i), \quad
H^y_j = \sum_{i=1}^m H(x_i, y_j),
\\ &
\bar{H}^x = \frac{1}{m} \sum_{i=1}^m H^x_i = n - n\hat{p},
\\ &
\bar{H}^y = \frac{1}{n} \sum_{j=1}^n H^y_j = m\hat{p}.
\end{aligned}
```

この函数は以下達の named tuple で返す:

```math
\begin{aligned}
&
\mathrm{phat} = 
\hat{p} = \frac{\bar{H}^y - \bar{H}^x + n}{m + n},
\\ &
\mathrm{sx2} =
\hat{\sigma}_x^2 = \frac{1}{n^2}\frac{1}{m-1}\sum_{i=1}^m (H^x_i - \bar{H}^x)^2,
\\ &
\mathrm{sy2} =
\hat{\sigma}_y^2 = \frac{1}{m^2}\frac{1}{n-1}\sum_{j=1}^n (H^y_j - \bar{H}^y)^2,
\\ &
\mathrm{sehat} = 
\widehat{\mathrm{se}} = \sqrt{\frac{\hat{\sigma}_x^2}{m} + \frac{\hat{\sigma}_y^2}{n}}, 
\\ &
\mathrm{tvalue} = t = \frac{\hat{p} - p}{\widehat{\mathrm{se}}},
\\ &
\mathrm{df} =
\nu = 
\frac
{\left(\hat{\sigma}_x^2/m + \hat{\sigma}_y^2/n\right)^2}
{
\dfrac{\left(\hat{\sigma}_x^2/m\right)^2}{m-1} +
\dfrac{\left(\hat{\sigma}_y^2/n\right)^2}{n-1}
},
\\ &
\mathrm{pvalue} =
2\mathrm{ccdf}(\mathrm{TDist}(\nu), |t|),
\\ &
\mathrm{p} = p.
\end{aligned}
```
"""
function brunner_munzel_test(X, Y; p=1/2)
    m, n = length(X), length(Y)
    phat = mean(h_brunner_munzel(x, y) for x in X, y in Y)
    Hbarx = n*(1 - phat)
    Hbary = m*phat
    sx2 = 1/n^2 * 1/(m-1) * sum(x -> (sum(h_brunner_munzel(y, x) for y in Y) - Hbarx)^2, X)
    sy2 = 1/m^2 * 1/(n-1) * sum(y -> (sum(h_brunner_munzel(x, y) for x in X) - Hbary)^2, Y)
    sehat = √(sx2/m + sy2/n)
    tvalue = (phat - p)/sehat
    df = (sx2/m + sy2/n)^2 / ((sx2/m)^2/(m-1) + (sy2/n)^2/(n-1))
    pvalue = 2ccdf(TDist(df), abs(tvalue))
    (; phat, sehat, tvalue, df, pvalue, p)
end

@doc raw"""
    pvalue_brunner_munzel_test(X, Y; p = 1/2)

この函数はBrunner-Munzel検定のP値 `pvalue` を返す.
"""
pvalue_brunner_munzel_test(X, Y; p=1/2) = brunner_munzel_test(X, Y; p).pvalue

Out[7]:

pvalue_brunner_munzel_test

In [8]:

function mann_whitney_u_test(X, Y; correct=true)
    m, n = length(X), length(Y)
    phat = mean((x < y) + (x == y)/2 for x in X, y in Y)
    sehat = √((m+n+1)/(12m*n))
    zvalue = (phat - 1/2)/sehat
    correction = correct/(2m*n*sehat)
    pvalue = 2ccdf(Normal(), max(0, abs(zvalue) - correction))
    (; phat, sehat, zvalue, pvalue)
end

pvalue_mann_whitney_u_test(X, Y; correct=true) = mann_whitney_u_test(X, Y; correct).pvalue

Out[8]:

pvalue_mann_whitney_u_test (generic function with 1 method)

In [9]:

using HypothesisTests

X = rand(30, 20)
Y = rand(30, 20)
pval1 = [HypothesisTests.pvalue(MannWhitneyUTest(x, y)) for (x, y) in zip(eachcol(X), eachcol(Y))]
pval2 = [pvalue_mann_whitney_u_test(x, y) for (x, y) in zip(eachcol(X), eachcol(Y))]
pval3 = [pvalue_mann_whitney_u_test(x, y; correct=false) for (x, y) in zip(eachcol(X), eachcol(Y))]
[pval1 pval2 pval3 pval1-pval2 pval1-pval3]

Out[9]:

20×5 Matrix{Float64}:
 0.652044    0.652044    0.646725     5.55112e-16  0.00531905
 0.53951     0.53951     0.534635     2.22045e-16  0.00487508
 0.195791    0.195791    0.193248     1.11022e-16  0.00254238
 0.830255    0.830255    0.824496    -1.11022e-16  0.00575953
 0.510598    0.510598    0.505859    -2.22045e-16  0.00473876
 0.0701266   0.0701266   0.0689901    9.71445e-17  0.00113645
 0.510598    0.510598    0.505859    -2.22045e-16  0.00473876
 0.970516    0.970516    0.964623     2.22045e-16  0.00589325
 0.795846    0.795846    0.790147     2.22045e-16  0.00569849
 0.211561    0.211561    0.208871     0.0          0.00269031
 0.222573    0.222573    0.219782    -5.55112e-17  0.00279064
 0.270705    0.270705    0.267503    -1.66533e-16  0.00320263
 0.610008    0.610008    0.604839    -2.22045e-16  0.0051689
 0.332855    0.332855    0.329177     1.66533e-16  0.00367727
 0.00831461  0.00831461  0.00813506   0.0          0.000179546
 0.888303    0.888303    0.882466     2.22045e-16  0.00583716
 0.185767    0.185767    0.183321    -1.38778e-16  0.00244563
 0.290472    0.290472    0.287112     1.11022e-16  0.00336022
 0.559231    0.559231    0.554268    -4.44089e-16  0.0049627
 0.318304    0.318304    0.314733     1.11022e-16  0.00357151

In [10]:

using HypothesisTests

X = rand(1000, 20)
Y = rand(1000, 20)
pval1 = [HypothesisTests.pvalue(MannWhitneyUTest(x, y)) for (x, y) in zip(eachcol(X), eachcol(Y))]
pval2 = [pvalue_mann_whitney_u_test(x, y) for (x, y) in zip(eachcol(X), eachcol(Y))]
pval3 = [pvalue_mann_whitney_u_test(x, y; correct=false) for (x, y) in zip(eachcol(X), eachcol(Y))]
[pval1 pval2 pval3 pval1-pval2 pval1-pval3]

Out[10]:

20×5 Matrix{Float64}:
 0.604703   0.604703   0.604676   -2.22045e-16  2.702e-5
 0.61441    0.61441    0.614383    6.66134e-16  2.7212e-5
 0.11235    0.11235    0.112341   -8.32667e-17  8.75926e-6
 0.59201    0.59201    0.591983   -2.55351e-15  2.67611e-5
 0.0562795  0.0562795  0.0562745  -4.92661e-16  4.99606e-6
 0.167388   0.167388   0.167376   -4.16334e-16  1.19109e-5
 0.549456   0.549456   0.549431    6.66134e-16  2.58268e-5
 0.9697     0.9697     0.969669    1.22125e-15  3.08719e-5
 0.80929    0.80929    0.809259   -1.22125e-15  3.00073e-5
 0.510457   0.510457   0.510432   -9.99201e-16  2.48782e-5
 0.843912   0.843912   0.843882   -2.33147e-15  3.0301e-5
 0.390636   0.390636   0.390615    1.60982e-15  2.13718e-5
 0.790204   0.790204   0.790174   -3.33067e-16  2.98198e-5
 0.491295   0.491295   0.491271    2.77556e-16  2.43787e-5
 0.966675   0.966675   0.966644   -8.88178e-16  3.08672e-5
 0.932146   0.932146   0.932115   -5.55112e-16  3.07824e-5
 0.0617762  0.0617762  0.0617708  -5.82867e-16  5.3979e-6
 0.814395   0.814395   0.814365    1.9984e-15   3.00544e-5
 0.417236   0.417236   0.417214   -7.21645e-16  2.22315e-5
 0.0516597  0.0516597  0.051655    1.04083e-16  4.65133e-6

In [11]:

X = rand(100)
Y = rand(100)
@time @show brunner_munzel_test(X, Y)
@time @show mann_whitney_u_test(X, Y);

brunner_munzel_test(X, Y) = (phat = 0.4877, sehat = 0.04117165593615652, tvalue = -0.29874921764315643, df = 197.9099321253469, pvalue = 0.7654448527620511, p = 0.5)
  0.889693 seconds (1.01 M allocations: 66.787 MiB, 1.67% gc time, 99.97% compilation time)
mann_whitney_u_test(X, Y) = (phat = 0.4877, sehat = 0.04092676385936225, zvalue = -0.3005368331165103, pvalue = 0.7646996047810375)
  0.051069 seconds (64.91 k allocations: 4.397 MiB, 99.41% compilation time)

In [12]:

distx = Exponential(1)
disty = Exponential(3)
distx = distx - mean(distx)
disty = disty - mean(disty)
a = minimum(quantile.((distx, disty), 0.01))
b = maximum(quantile.((distx, disty), 0.99))
a, b

Out[12]:

(-2.9698489924394957, 10.815510557964274)

In [13]:

using HypothesisTests
ECDF(A, x) = count(≤(x), A)/length(A)
using StatsPlots
default(fmt=:png)
using Random

function plot_sim_bm_mw(;
        distx = Exponential(1),
        disty = Exponential(3),
        xlim = :auto,
        ylim = :auto,
        m = 100,
        n = 100,
        L = 10^6,
        shifttype = :median,
        α = 0.05,
        legend = :bottomright,
        correct = true,
    )
    @show distx
    @show disty
    @show tiesh = tieshift(distx, disty)
    @show mediansh = median(disty) - median(distx)
    @show meansh = mean(disty) - mean(distx)
    println()
    @show shifttype
    if shifttype == :tie
        @show distx = distx + tiesh
    elseif shifttype == :median
        @show distx = distx + mediansh
    elseif shifttype == :mean
        @show distx = distx + meansh
    end
    @show winningrate(distx, disty)
    @show median(distx) median(disty)
    @show mean(distx) mean(disty)

    if xlim == :auto
        a = minimum(quantile.((distx, disty), 0.01))
        b = maximum(quantile.((distx, disty), 0.99))
        l = b - a
        a = a - l/10
        b = b + l/10
    else
        a, b = xlim
    end
    P1 = plot(distx, a, b; label="distx")
    plot!(disty, a, b; label="disty", ls=:dash)
    plot!(; ylim)
    title!("m = $m,  n = $n"; titlefontsize=10)

    @show m
    @show n
    println()

    pval_bm = zeros(L)
    pval_mw = zeros(L)

    nth = Threads.nthreads()
    Xtmp = [zeros(m) for _ in 1:nth]
    Ytmp = [zeros(n) for _ in 1:nth]
    
    @time Threads.@threads for i in 1:L
        tid = Threads.threadid()
        X = rand!(distx, Xtmp[tid])
        Y = rand!(disty, Ytmp[tid])
        pval_bm[i] = pvalue_brunner_munzel_test(X, Y)
        pval_mw[i] = pvalue_mann_whitney_u_test(X, Y; correct)
    end

    @show α
    @show ECDF(pval_bm, α)
    @show ECDF(pval_mw, α)

    _tick = [0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1]
    xtick = ytick = (_tick, string.(_tick))

    αs = exp.(range(log(0.002), log(1), 1000))
    P2 = plot(αs, α -> ECDF(pval_bm, α); label="Brunner-Munzel test")
    plot!(αs, α -> ECDF(pval_mw, α); label="Mann-Whitney U-test", ls=:dash)
    plot!(αs, identity; label="", ls=:dot, c=:gray)
    plot!(αs, x->0.8x; label="", ls=:dot, c=:gray)
    plot!(αs, x->1.2x; label="", ls=:dot, c=:gray)
    plot!(; xscale=:log10, yscale=:log10, xtick, ytick)
    plot!(; legend)
    
    plot(P1, P2; size=(400, 570), layout=@layout [a{0.3h}; b])
    plot!(leftmargin=4Plots.mm)
end

Out[13]:

plot_sim_bm_mw (generic function with 1 method)

In [14]:

plot_sim_bm_mw(; m=100, n=100, shifttype=:tie)

distx = Exponential{Float64}(θ=1.0)
disty = Exponential{Float64}(θ=3.0)
tiesh = tieshift(distx, disty) = 1.2163953243549488
mediansh = median(disty) - median(distx) = 1.3862943611198904
meansh = mean(disty) - mean(distx) = 2.0

shifttype = :tie
distx = distx + tiesh = LocationScale{Float64, Continuous, Exponential{Float64}}(
μ: 1.2163953243549488
σ: 1.0
ρ: Exponential{Float64}(θ=1.0)
)

winningrate(distx, disty) = 0.5
median(distx) = 1.909542504914894
median(disty) = 2.0794415416798357
mean(distx) = 2.2163953243549486
mean(disty) = 3.0
m = 100
n = 100

  6.211002 seconds (5.87 M allocations: 276.589 MiB, 31.71% compilation time)
α = 0.05
ECDF(pval_bm, α) = 0.050721
ECDF(pval_mw, α) = 0.070232

Out[14]:

In [15]:

plot_sim_bm_mw(; m=100, n=100, shifttype=:median)

distx = Exponential{Float64}(θ=1.0)
disty = Exponential{Float64}(θ=3.0)
tiesh = tieshift(distx, disty) = 1.2163953243549488
mediansh = median(disty) - median(distx) = 1.3862943611198904
meansh = mean(disty) - mean(distx) = 2.0

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, Exponential{Float64}}(
μ: 1.3862943611198904
σ: 1.0
ρ: Exponential{Float64}(θ=1.0)
)

winningrate(distx, disty) = 0.47247039320177775
median(distx) = 2.0794415416798357
median(disty) = 2.0794415416798357
mean(distx) = 2.3862943611198904
mean(disty) = 3.0
m = 100
n = 100

  6.252474 seconds (5.82 M allocations: 313.436 MiB, 0.89% gc time)
α = 0.05
ECDF(pval_bm, α) = 0.095378
ECDF(pval_mw, α) = 0.125446

Out[15]:

In [16]:

plot_sim_bm_mw(; m=100, n=100, shifttype=:mean)

distx = Exponential{Float64}(θ=1.0)
disty = Exponential{Float64}(θ=3.0)
tiesh = tieshift(distx, disty) = 1.2163953243549488
mediansh = median(disty) - median(distx) = 1.3862943611198904
meansh = mean(disty) - mean(distx) = 2.0

shifttype = :mean
distx = distx + meansh = LocationScale{Float64, Continuous, Exponential{Float64}}(
μ: 2.0
σ: 1.0
ρ: Exponential{Float64}(θ=1.0)
)

winningrate(distx, disty) = 0.3850628403942911
median(distx) = 2.6931471805599454
median(disty) = 2.0794415416798357
mean(distx) = 3.0
mean(disty) = 3.0
m = 100
n = 100

  6.831852 seconds (6.89 M allocations: 623.154 MiB, 0.80% gc time)
α = 0.05
ECDF(pval_bm, α) = 0.728763
ECDF(pval_mw, α) = 0.786573

Out[16]:

In [17]:

plot_sim_bm_mw(; m=200, n=200, shifttype=:median, L=10^5)

distx = Exponential{Float64}(θ=1.0)
disty = Exponential{Float64}(θ=3.0)
tiesh = tieshift(distx, disty) = 1.2163953243549488
mediansh = median(disty) - median(distx) = 1.3862943611198904
meansh = mean(disty) - mean(distx) = 2.0

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, Exponential{Float64}}(
μ: 1.3862943611198904
σ: 1.0
ρ: Exponential{Float64}(θ=1.0)
)

winningrate(distx, disty) = 0.47247039320177775
median(distx) = 2.0794415416798357
median(disty) = 2.0794415416798357
mean(distx) = 2.3862943611198904
mean(disty) = 3.0
m = 200
n = 200

  2.407584 seconds (595.85 k allocations: 35.661 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.13955
ECDF(pval_mw, α) = 0.1787

Out[17]:

In [18]:

plot_sim_bm_mw(; m=500, n=500, shifttype=:median, L=5*10^4)

distx = Exponential{Float64}(θ=1.0)
disty = Exponential{Float64}(θ=3.0)
tiesh = tieshift(distx, disty) = 1.2163953243549488
mediansh = median(disty) - median(distx) = 1.3862943611198904
meansh = mean(disty) - mean(distx) = 2.0

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, Exponential{Float64}}(
μ: 1.3862943611198904
σ: 1.0
ρ: Exponential{Float64}(θ=1.0)
)

winningrate(distx, disty) = 0.47247039320177775
median(distx) = 2.0794415416798357
median(disty) = 2.0794415416798357
mean(distx) = 2.3862943611198904
mean(disty) = 3.0
m = 500
n = 500

  7.322873 seconds (315.59 k allocations: 23.073 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.28222
ECDF(pval_mw, α) = 0.33782

Out[18]:

In [19]:

plot_sim_bm_mw(; m=1000, n=1000, shifttype=:median, L=10^4)

distx = Exponential{Float64}(θ=1.0)
disty = Exponential{Float64}(θ=3.0)
tiesh = tieshift(distx, disty) = 1.2163953243549488
mediansh = median(disty) - median(distx) = 1.3862943611198904
meansh = mean(disty) - mean(distx) = 2.0

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, Exponential{Float64}}(
μ: 1.3862943611198904
σ: 1.0
ρ: Exponential{Float64}(θ=1.0)
)

winningrate(distx, disty) = 0.47247039320177775
median(distx) = 2.0794415416798357
median(disty) = 2.0794415416798357
mean(distx) = 2.3862943611198904
mean(disty) = 3.0
m = 1000
n = 1000

  5.767039 seconds (65.86 k allocations: 5.605 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.5084
ECDF(pval_mw, α) = 0.5693

Out[19]:

In [20]:

"""
gammadist(σ, β)

returns the gamma distribution with standard deviation `σ` and skewness `β`.
"""
gammadist(σ, β) = Gamma(4/β^2, β*σ/2)

Out[20]:

gammadist

In [21]:

gam = gammadist.(1:5, 2:2:10)
std.(gam), skewness.(gam)

Out[21]:

([1.0, 2.0, 3.0, 4.0, 5.0], [2.0, 4.0, 6.0, 8.0, 10.0])

In [22]:

[(β, shape(gammadist(1, β)), gammadist(1, β)) for β in 0.2:0.2:3]

Out[22]:

15-element Vector{Tuple{Float64, Float64, Gamma{Float64}}}:
 (0.2, 99.99999999999999, Gamma{Float64}(α=99.99999999999999, θ=0.1))
 (0.4, 24.999999999999996, Gamma{Float64}(α=24.999999999999996, θ=0.2))
 (0.6, 11.11111111111111, Gamma{Float64}(α=11.11111111111111, θ=0.3))
 (0.8, 6.249999999999999, Gamma{Float64}(α=6.249999999999999, θ=0.4))
 (1.0, 4.0, Gamma{Float64}(α=4.0, θ=0.5))
 (1.2, 2.7777777777777777, Gamma{Float64}(α=2.7777777777777777, θ=0.6))
 (1.4, 2.0408163265306127, Gamma{Float64}(α=2.0408163265306127, θ=0.7))
 (1.6, 1.5624999999999998, Gamma{Float64}(α=1.5624999999999998, θ=0.8))
 (1.8, 1.2345679012345678, Gamma{Float64}(α=1.2345679012345678, θ=0.9))
 (2.0, 1.0, Gamma{Float64}(α=1.0, θ=1.0))
 (2.2, 0.8264462809917354, Gamma{Float64}(α=0.8264462809917354, θ=1.1))
 (2.4, 0.6944444444444444, Gamma{Float64}(α=0.6944444444444444, θ=1.2))
 (2.6, 0.5917159763313609, Gamma{Float64}(α=0.5917159763313609, θ=1.3))
 (2.8, 0.5102040816326532, Gamma{Float64}(α=0.5102040816326532, θ=1.4))
 (3.0, 0.4444444444444444, Gamma{Float64}(α=0.4444444444444444, θ=1.5))

In [23]:

P = plot()
for β in [0.4; 1:3]
    dist = gammadist(1, β)
    @eval @show (mean(gammadist(1, $β)), median(gammadist(1, $β)), shape(gammadist(1, $β)))
    dist = dist - median(dist)
    plot!(dist, -3, 3; label="$β", ls=:auto)
    plot!(ylim=(-0.05, 2.05))
end
plot!()

(mean(gammadist(1, 0.4)), median(gammadist(1, 0.4)), shape(gammadist(1, 0.4))) = (5.0, 4.933493673397683, 24.999999999999996)
(mean(gammadist(1, 1.0)), median(gammadist(1, 1.0)), shape(gammadist(1, 1.0))) = (2.0, 1.8360303744254478, 4.0)
(mean(gammadist(1, 2.0)), median(gammadist(1, 2.0)), shape(gammadist(1, 2.0))) = (1.0, 0.6931471805599451, 1.0)
(mean(gammadist(1, 3.0)), median(gammadist(1, 3.0)), shape(gammadist(1, 3.0))) = (0.6666666666666666, 0.2711292144816104, 0.4444444444444444)

Out[23]:

In [24]:

distx = gammadist(1, 2.5)
disty = gammadist(1, 3)
xlim = (-1.0, 1.5)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:median, L=10^5)

std(distx) = 1.0
std(disty) = 1.0
skewness(distx) = 2.5
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.64, θ=1.25)
disty = Gamma{Float64}(α=0.4444444444444444, θ=1.5)
tiesh = tieshift(distx, disty) = -0.08777242018131629
mediansh = median(disty) - median(distx) = -0.16894628370998221
meansh = mean(disty) - mean(distx) = -0.13333333333333341

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: -0.16894628370998221
σ: 1.0
ρ: Gamma{Float64}(α=0.64, θ=1.25)
)

winningrate(distx, disty) = 0.5509427374639214
median(distx) = 0.2711292144816104
median(disty) = 0.2711292144816104
mean(distx) = 0.6310537162900178
mean(disty) = 0.6666666666666666
m = 100
n = 100

  0.849446 seconds (711.46 k allocations: 48.771 MiB, 131.71% compilation time)
α = 0.05
ECDF(pval_bm, α) = 0.23268
ECDF(pval_mw, α) = 0.24122

Out[24]:

In [25]:

distx = gammadist(1, 2.5)
disty = gammadist(1, 3)
xlim = (-1.0, 1.5)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:tie, L=10^5)

std(distx) = 1.0
std(disty) = 1.0
skewness(distx) = 2.5
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.64, θ=1.25)
disty = Gamma{Float64}(α=0.4444444444444444, θ=1.5)
tiesh = tieshift(distx, disty) = -0.08777242018131629
mediansh = median(disty) - median(distx) = -0.16894628370998221
meansh = mean(disty) - mean(distx) = -0.13333333333333341

shifttype = :tie
distx = distx + tiesh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: -0.08777242018131629
σ: 1.0
ρ: Gamma{Float64}(α=0.64, θ=1.25)
)

winningrate(distx, disty) = 0.5
median(distx) = 0.3523030780102764
median(disty) = 0.2711292144816104
mean(distx) = 0.7122275798186838
mean(disty) = 0.6666666666666666
m = 100
n = 100

  0.738244 seconds (563.51 k allocations: 26.283 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.05076
ECDF(pval_mw, α) = 0.05254

Out[25]:

In [26]:

distx = gammadist(1, 2.5)
disty = gammadist(1.5, 3)
xlim = (-1.0, 1.5)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:median, L=10^5)

std(distx) = 1.0
std(disty) = 1.5
skewness(distx) = 2.5
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.64, θ=1.25)
disty = Gamma{Float64}(α=0.4444444444444444, θ=2.25)
tiesh = tieshift(distx, disty) = -0.009663713334820828
mediansh = median(disty) - median(distx) = -0.03338167646917706
meansh = mean(disty) - mean(distx) = 0.19999999999999996

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: -0.03338167646917706
σ: 1.0
ρ: Gamma{Float64}(α=0.64, θ=1.25)
)

winningrate(distx, disty) = 0.5197129615903453
median(distx) = 0.4066938217224156
median(disty) = 0.4066938217224156
mean(distx) = 0.766618323530823
mean(disty) = 1.0
m = 100
n = 100

  0.736813 seconds (574.04 k allocations: 29.338 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.07527
ECDF(pval_mw, α) = 0.07435

Out[26]:

In [27]:

distx = gammadist(1, 2.5)
disty = gammadist(1.5, 3)
xlim = (-1.0, 1.5)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:tie, L=10^5)

std(distx) = 1.0
std(disty) = 1.5
skewness(distx) = 2.5
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.64, θ=1.25)
disty = Gamma{Float64}(α=0.4444444444444444, θ=2.25)
tiesh = tieshift(distx, disty) = -0.009663713334820828
mediansh = median(disty) - median(distx) = -0.03338167646917706
meansh = mean(disty) - mean(distx) = 0.19999999999999996

shifttype = :tie
distx = distx + tiesh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: -0.009663713334820828
σ: 1.0
ρ: Gamma{Float64}(α=0.64, θ=1.25)
)

winningrate(distx, disty) = 0.5
median(distx) = 0.4304117848567718
median(disty) = 0.4066938217224156
mean(distx) = 0.7903362866651792
mean(disty) = 1.0
m = 100
n = 100

  0.761795 seconds (563.33 k allocations: 26.237 MiB, 3.80% gc time)
α = 0.05
ECDF(pval_bm, α) = 0.0499
ECDF(pval_mw, α) = 0.0501

Out[27]:

In [28]:

distx = gammadist(1, 2.5)
disty = gammadist(2, 3)
xlim = (-0.5, 1.5)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:median, L=10^5)

std(distx) = 1.0
std(disty) = 2.0
skewness(distx) = 2.5
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.64, θ=1.25)
disty = Gamma{Float64}(α=0.4444444444444444, θ=3.0)
tiesh = tieshift(distx, disty) = 0.05779789017907937
mediansh = median(disty) - median(distx) = 0.10218293077162821
meansh = mean(disty) - mean(distx) = 0.5333333333333332

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: 0.10218293077162821
σ: 1.0
ρ: Gamma{Float64}(α=0.64, θ=1.25)
)

winningrate(distx, disty) = 0.47933127535571446
median(distx) = 0.5422584289632209
median(disty) = 0.5422584289632209
mean(distx) = 0.9021829307716283
mean(disty) = 1.3333333333333333
m = 100
n = 100

  0.754197 seconds (574.45 k allocations: 29.456 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.07883
ECDF(pval_mw, α) = 0.08699

Out[28]:

In [29]:

distx = gammadist(1, 2.5)
disty = gammadist(2, 3)
xlim = (-0.5, 1.5)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:tie, L=10^5)

std(distx) = 1.0
std(disty) = 2.0
skewness(distx) = 2.5
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.64, θ=1.25)
disty = Gamma{Float64}(α=0.4444444444444444, θ=3.0)
tiesh = tieshift(distx, disty) = 0.05779789017907937
mediansh = median(disty) - median(distx) = 0.10218293077162821
meansh = mean(disty) - mean(distx) = 0.5333333333333332

shifttype = :tie
distx = distx + tiesh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: 0.05779789017907937
σ: 1.0
ρ: Gamma{Float64}(α=0.64, θ=1.25)
)

winningrate(distx, disty) = 0.5
median(distx) = 0.497873388370672
median(disty) = 0.5422584289632209
mean(distx) = 0.8577978901790794
mean(disty) = 1.3333333333333333
m = 100
n = 100

  0.775140 seconds (563.27 k allocations: 26.217 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.05057
ECDF(pval_mw, α) = 0.05505

Out[29]:

In [30]:

distx = gammadist(1.5, 2.5)
disty = gammadist(1, 3)
xlim = (-1, 1.5)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:median, L=10^5)

std(distx) = 1.5
std(disty) = 1.0
skewness(distx) = 2.5
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.64, θ=1.875)
disty = Gamma{Float64}(α=0.4444444444444444, θ=1.5)
tiesh = tieshift(distx, disty) = -0.24745995664908638
mediansh = median(disty) - median(distx) = -0.38898403280577853
meansh = mean(disty) - mean(distx) = -0.5333333333333333

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: -0.38898403280577853
σ: 1.0
ρ: Gamma{Float64}(α=0.64, θ=1.875)
)

winningrate(distx, disty) = 0.5550759373024472
median(distx) = 0.2711292144816104
median(disty) = 0.2711292144816104
mean(distx) = 0.8110159671942214
mean(disty) = 0.6666666666666666
m = 100
n = 100

  0.749274 seconds (623.99 k allocations: 43.794 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.25063
ECDF(pval_mw, α) = 0.2809

Out[30]:

In [31]:

distx = gammadist(1.5, 2.5)
disty = gammadist(1, 3)
xlim = (-1, 1.5)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:tie, L=10^5)

std(distx) = 1.5
std(disty) = 1.0
skewness(distx) = 2.5
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.64, θ=1.875)
disty = Gamma{Float64}(α=0.4444444444444444, θ=1.5)
tiesh = tieshift(distx, disty) = -0.24745995664908638
mediansh = median(disty) - median(distx) = -0.38898403280577853
meansh = mean(disty) - mean(distx) = -0.5333333333333333

shifttype = :tie
distx = distx + tiesh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: -0.24745995664908638
σ: 1.0
ρ: Gamma{Float64}(α=0.64, θ=1.875)
)

winningrate(distx, disty) = 0.5
median(distx) = 0.4126532906383026
median(disty) = 0.2711292144816104
mean(distx) = 0.9525400433509136
mean(disty) = 0.6666666666666666
m = 100
n = 100

  0.790268 seconds (563.63 k allocations: 26.322 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.0508
ECDF(pval_mw, α) = 0.05861

Out[31]:

In [32]:

distx = gammadist(2, 2.5)
disty = gammadist(1, 3)
xlim = (-1, 1.5)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:median, L=10^5)

std(distx) = 2.0
std(disty) = 1.0
skewness(distx) = 2.5
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.64, θ=2.5)
disty = Gamma{Float64}(α=0.4444444444444444, θ=1.5)
tiesh = tieshift(distx, disty) = -0.4281203990096133
mediansh = median(disty) - median(distx) = -0.6090217819015749
meansh = mean(disty) - mean(distx) = -0.9333333333333335

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: -0.6090217819015749
σ: 1.0
ρ: Gamma{Float64}(α=0.64, θ=2.5)
)

winningrate(distx, disty) = 0.5517130376525936
median(distx) = 0.2711292144816104
median(disty) = 0.2711292144816104
mean(distx) = 0.9909782180984252
mean(disty) = 0.6666666666666666
m = 100
n = 100

  0.763029 seconds (616.50 k allocations: 41.671 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.21693
ECDF(pval_mw, α) = 0.25952

Out[32]:

In [33]:

distx = gammadist(2, 2.5)
disty = gammadist(1, 3)
xlim = (-1.0, 1.5)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:tie, L=10^5)

std(distx) = 2.0
std(disty) = 1.0
skewness(distx) = 2.5
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.64, θ=2.5)
disty = Gamma{Float64}(α=0.4444444444444444, θ=1.5)
tiesh = tieshift(distx, disty) = -0.4281203990096133
mediansh = median(disty) - median(distx) = -0.6090217819015749
meansh = mean(disty) - mean(distx) = -0.9333333333333335

shifttype = :tie
distx = distx + tiesh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: -0.4281203990096133
σ: 1.0
ρ: Gamma{Float64}(α=0.64, θ=2.5)
)

winningrate(distx, disty) = 0.5
median(distx) = 0.452030597373572
median(disty) = 0.2711292144816104
mean(distx) = 1.171879600990387
mean(disty) = 0.6666666666666666
m = 100
n = 100

  0.737656 seconds (562.70 k allocations: 26.053 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.05055
ECDF(pval_mw, α) = 0.06395

Out[33]:

In [34]:

distx = gammadist(3, 2.5)
disty = gammadist(1, 3)
xlim = (-1.5, 1.5)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:median, L=10^5)

std(distx) = 3.0
std(disty) = 1.0
skewness(distx) = 2.5
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.64, θ=3.75)
disty = Gamma{Float64}(α=0.4444444444444444, θ=1.5)
tiesh = tieshift(distx, disty) = -0.8188418791321946
mediansh = median(disty) - median(distx) = -1.0490972800931675
meansh = mean(disty) - mean(distx) = -1.7333333333333334

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: -1.0490972800931675
σ: 1.0
ρ: Gamma{Float64}(α=0.64, θ=3.75)
)

winningrate(distx, disty) = 0.5434241893304153
median(distx) = 0.2711292144816104
median(disty) = 0.2711292144816104
mean(distx) = 1.3509027199068324
mean(disty) = 0.6666666666666666
m = 100
n = 100

  0.784830 seconds (601.61 k allocations: 37.334 MiB, 3.73% gc time)
α = 0.05
ECDF(pval_bm, α) = 0.16138
ECDF(pval_mw, α) = 0.21249

Out[34]:

In [35]:

distx = gammadist(3, 2.5)
disty = gammadist(1, 3)
xlim = (-1.5, 1.5)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:tie, L=10^5)

std(distx) = 3.0
std(disty) = 1.0
skewness(distx) = 2.5
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.64, θ=3.75)
disty = Gamma{Float64}(α=0.4444444444444444, θ=1.5)
tiesh = tieshift(distx, disty) = -0.8188418791321946
mediansh = median(disty) - median(distx) = -1.0490972800931675
meansh = mean(disty) - mean(distx) = -1.7333333333333334

shifttype = :tie
distx = distx + tiesh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: -0.8188418791321946
σ: 1.0
ρ: Gamma{Float64}(α=0.64, θ=3.75)
)

winningrate(distx, disty) = 0.4999999999999999
median(distx) = 0.5013846154425833
median(disty) = 0.2711292144816104
mean(distx) = 1.5811581208678054
mean(disty) = 0.6666666666666666
m = 100
n = 100

  0.741360 seconds (563.52 k allocations: 26.291 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.0499
ECDF(pval_mw, α) = 0.07205

Out[35]:

In [36]:

distx = gammadist(1, 3)
disty = gammadist(2, 3)
xlim = (-1.0, 2.0)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:median, L=10^6, correct=false)

std(distx) = 1.0
std(disty) = 2.0
skewness(distx) = 3.0
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.4444444444444444, θ=1.5)
disty = Gamma{Float64}(α=0.4444444444444444, θ=3.0)
tiesh = tieshift(distx, disty) = 0.15274478695442129
mediansh = median(disty) - median(distx) = 0.2711292144816104
meansh = mean(disty) - mean(distx) = 0.6666666666666666

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: 0.2711292144816104
σ: 1.0
ρ: Gamma{Float64}(α=0.4444444444444444, θ=1.5)
)

winningrate(distx, disty) = 0.45349675319301963
median(distx) = 0.5422584289632209
median(disty) = 0.5422584289632209
mean(distx) = 0.937795881148277
mean(disty) = 1.3333333333333333
m = 100
n = 100

  7.586112 seconds (6.10 M allocations: 396.496 MiB, 0.48% gc time)
α = 0.05
ECDF(pval_bm, α) = 0.189683
ECDF(pval_mw, α) = 0.21931

Out[36]:

In [37]:

distx = gammadist(1, 3)
disty = gammadist(2, 3)
xlim = (-1.0, 2.0)
ylim = (-0.2, 5.2)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, xlim, ylim, m=100, n=100, shifttype=:median, L=10^6, correct=true)

std(distx) = 1.0
std(disty) = 2.0
skewness(distx) = 3.0
skewness(disty) = 3.0
distx = Gamma{Float64}(α=0.4444444444444444, θ=1.5)
disty = Gamma{Float64}(α=0.4444444444444444, θ=3.0)
tiesh = tieshift(distx, disty) = 0.15274478695442129
mediansh = median(disty) - median(distx) = 0.2711292144816104
meansh = mean(disty) - mean(distx) = 0.6666666666666666

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, Gamma{Float64}}(
μ: 0.2711292144816104
σ: 1.0
ρ: Gamma{Float64}(α=0.4444444444444444, θ=1.5)
)

winningrate(distx, disty) = 0.45349675319301963
median(distx) = 0.5422584289632209
median(disty) = 0.5422584289632209
mean(distx) = 0.937795881148277
mean(disty) = 1.3333333333333333
m = 100
n = 100

  7.672621 seconds (6.10 M allocations: 396.472 MiB, 0.60% gc time)
α = 0.05
ECDF(pval_bm, α) = 0.189578
ECDF(pval_mw, α) = 0.219831

Out[37]:

In [38]:

function inversegammadist(σ, β)
    α = 3 + 8/β^2 + 4/β * √(1 + 4/β^2)
    θ = σ * (α - 1) * √(α - 2)
    InverseGamma(α, θ)
end

Out[38]:

inversegammadist (generic function with 1 method)

In [39]:

igam = inversegammadist.(1:5, 2:2:10)
std.(igam), skewness.(igam)

Out[39]:

([0.9999999999999999, 2.0, 3.0, 3.9999999999999996, 5.0], [2.0, 4.0, 6.0, 7.999999999999998, 10.0])

In [40]:

@show igam = inversegammadist.(1, Inf)
std.(igam), skewness.(igam)

igam = inversegammadist.(1, Inf) = InverseGamma{Float64}(
invd: Gamma{Float64}(α=3.0, θ=0.5)
θ: 2.0
)

Out[40]:

(1.0, NaN)

In [49]:

P = plot()
for β in [0.4; 1:3]
    dist = inversegammadist(1, β)
    @show (β, mean(dist), median(dist), shape(dist))
    dist = dist - median(dist)
    plot!(dist, -3, 6; label="$β", ls=:auto)
end
plot!()

(β, mean(dist), median(dist), shape(dist)) = (0.4, 10.099019513592784, 10.034049455289487, 103.99019513592785)
(β, mean(dist), median(dist), shape(dist)) = (1.0, 4.23606797749979, 4.091853950002106, 19.94427190999916)
(β, mean(dist), median(dist), shape(dist)) = (2.0, 2.414213562373095, 2.1987013095619345, 7.82842712474619)
(β, mean(dist), median(dist), shape(dist)) = (3.0, 1.8685170918213296, 1.6258040942425198, 5.4913561224284395)

Out[49]:

In [41]:

distx = inversegammadist(1, 1)
disty = inversegammadist(1, 10)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, m=100, n=100, shifttype=:median, L=10^5)

std(distx) = 1.0
std(disty) = 1.0
skewness(distx) = 1.0
skewness(disty) = 10.0
distx = InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.012461179749810725)
θ: 80.24922359499622
)

disty = InverseGamma{Float64}(
invd: Gamma{Float64}(α=3.487921561087423, θ=0.32951356487309685)
θ: 3.034776429872083
)

tiesh = tieshift(distx, disty) = -3.024489446241996
mediansh = median(disty) - median(distx) = -3.131740170881422
meansh = mean(disty) - mean(distx) = -3.016264074781233

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, InverseGamma{Float64}}(
μ: -3.131740170881422
σ: 1.0
ρ: InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.012461179749810725)
θ: 80.24922359499622
)

)

winningrate(distx, disty) = 0.5397140117551941
median(distx) = 0.9601137791206837
median(disty) = 0.9601137791206837
mean(distx) = 1.1043278066183677
mean(disty) = 1.219803902718557
m = 100
n = 100

  0.749990 seconds (626.06 k allocations: 38.803 MiB, 61.45% compilation time)
α = 0.05
ECDF(pval_bm, α) = 0.15839
ECDF(pval_mw, α) = 0.1686

Out[41]:

In [50]:

distx = inversegammadist(1, 1)
disty = inversegammadist(1, 10)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, m=100, n=100, shifttype=:mean, L=10^5)

std(distx) = 1.0
std(disty) = 1.0
skewness(distx) = 1.0
skewness(disty) = 10.0
distx = InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.012461179749810725)
θ: 80.24922359499622
)

disty = InverseGamma{Float64}(
invd: Gamma{Float64}(α=3.487921561087423, θ=0.32951356487309685)
θ: 3.034776429872083
)

tiesh = tieshift(distx, disty) = -3.024489446241996
mediansh = median(disty) - median(distx) = -3.131740170881422
meansh = mean(disty) - mean(distx) = -3.016264074781233

shifttype = :mean
distx = distx + meansh = LocationScale{Float64, Continuous, InverseGamma{Float64}}(
μ: -3.016264074781233
σ: 1.0
ρ: InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.012461179749810725)
θ: 80.24922359499622
)

)

winningrate(distx, disty) = 0.4969294651265493
median(distx) = 1.075589875220873
median(disty) = 0.9601137791206837
mean(distx) = 1.219803902718557
mean(disty) = 1.219803902718557
m = 100
n = 100

  0.625812 seconds (563.50 k allocations: 26.281 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.05188
ECDF(pval_mw, α) = 0.05691

Out[50]:

In [42]:

distx = inversegammadist(1, 1)
disty = inversegammadist(1, 10)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, m=100, n=100, shifttype=:tie, L=10^5)

std(distx) = 1.0
std(disty) = 1.0
skewness(distx) = 1.0
skewness(disty) = 10.0
distx = InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.012461179749810725)
θ: 80.24922359499622
)

disty = InverseGamma{Float64}(
invd: Gamma{Float64}(α=3.487921561087423, θ=0.32951356487309685)
θ: 3.034776429872083
)

tiesh = tieshift(distx, disty) = -3.024489446241996
mediansh = median(disty) - median(distx) = -3.131740170881422
meansh = mean(disty) - mean(distx) = -3.016264074781233

shifttype = :tie
distx = distx + tiesh = LocationScale{Float64, Continuous, InverseGamma{Float64}}(
μ: -3.024489446241996
σ: 1.0
ρ: InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.012461179749810725)
θ: 80.24922359499622
)

)

winningrate(distx, disty) = 0.4999999999999999
median(distx) = 1.06736450376011
median(disty) = 0.9601137791206837
mean(distx) = 1.211578531257794
mean(disty) = 1.219803902718557
m = 100
n = 100

  0.706617 seconds (562.59 k allocations: 26.021 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.05035
ECDF(pval_mw, α) = 0.05535

Out[42]:

In [43]:

distx = inversegammadist(1, 1)
disty = inversegammadist(1, 10)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, m=50, n=100, shifttype=:median, L=10^5)

std(distx) = 1.0
std(disty) = 1.0
skewness(distx) = 1.0
skewness(disty) = 10.0
distx = InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.012461179749810725)
θ: 80.24922359499622
)

disty = InverseGamma{Float64}(
invd: Gamma{Float64}(α=3.487921561087423, θ=0.32951356487309685)
θ: 3.034776429872083
)

tiesh = tieshift(distx, disty) = -3.024489446241996
mediansh = median(disty) - median(distx) = -3.131740170881422
meansh = mean(disty) - mean(distx) = -3.016264074781233

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, InverseGamma{Float64}}(
μ: -3.131740170881422
σ: 1.0
ρ: InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.012461179749810725)
θ: 80.24922359499622
)

)

winningrate(distx, disty) = 0.5397140117551941
median(distx) = 0.9601137791206837
median(disty) = 0.9601137791206837
mean(distx) = 1.1043278066183677
mean(disty) = 1.219803902718557
m = 50
n = 100

  0.402107 seconds (586.08 k allocations: 32.829 MiB, 6.11% gc time)
α = 0.05
ECDF(pval_bm, α) = 0.11098
ECDF(pval_mw, α) = 0.1521

Out[43]:

In [44]:

distx = inversegammadist(1, 1)
disty = inversegammadist(1, 10)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, m=50, n=100, shifttype=:mean, L=10^5)

std(distx) = 1.0
std(disty) = 1.0
skewness(distx) = 1.0
skewness(disty) = 10.0
distx = InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.012461179749810725)
θ: 80.24922359499622
)

disty = InverseGamma{Float64}(
invd: Gamma{Float64}(α=3.487921561087423, θ=0.32951356487309685)
θ: 3.034776429872083
)

tiesh = tieshift(distx, disty) = -3.024489446241996
mediansh = median(disty) - median(distx) = -3.131740170881422
meansh = mean(disty) - mean(distx) = -3.016264074781233

shifttype = :mean
distx = distx + meansh = LocationScale{Float64, Continuous, InverseGamma{Float64}}(
μ: -3.016264074781233
σ: 1.0
ρ: InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.012461179749810725)
θ: 80.24922359499622
)

)

winningrate(distx, disty) = 0.4969294651265493
median(distx) = 1.075589875220873
median(disty) = 0.9601137791206837
mean(distx) = 1.219803902718557
mean(disty) = 1.219803902718557
m = 50
n = 100

  0.391127 seconds (562.90 k allocations: 26.109 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.05111
ECDF(pval_mw, α) = 0.07514

Out[44]:

In [45]:

distx = inversegammadist(1, 1)
disty = inversegammadist(1, 10)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, m=50, n=100, shifttype=:tie, L=10^5)

std(distx) = 1.0
std(disty) = 1.0
skewness(distx) = 1.0
skewness(disty) = 10.0
distx = InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.012461179749810725)
θ: 80.24922359499622
)

disty = InverseGamma{Float64}(
invd: Gamma{Float64}(α=3.487921561087423, θ=0.32951356487309685)
θ: 3.034776429872083
)

tiesh = tieshift(distx, disty) = -3.024489446241996
mediansh = median(disty) - median(distx) = -3.131740170881422
meansh = mean(disty) - mean(distx) = -3.016264074781233

shifttype = :tie
distx = distx + tiesh = LocationScale{Float64, Continuous, InverseGamma{Float64}}(
μ: -3.024489446241996
σ: 1.0
ρ: InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.012461179749810725)
θ: 80.24922359499622
)

)

winningrate(distx, disty) = 0.4999999999999999
median(distx) = 1.06736450376011
median(disty) = 0.9601137791206837
mean(distx) = 1.211578531257794
mean(disty) = 1.219803902718557
m = 50
n = 100

  0.372641 seconds (562.83 k allocations: 26.092 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.0509
ECDF(pval_mw, α) = 0.07421

Out[45]:

In [54]:

distx = inversegammadist(2, 1)
disty = inversegammadist(1, 10)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, m=50, n=100, shifttype=:median, L=10^5)

std(distx) = 2.0
std(disty) = 1.0
skewness(distx) = 1.0
skewness(disty) = 10.0
distx = InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.0062305898749053625)
θ: 160.49844718999245
)

disty = InverseGamma{Float64}(
invd: Gamma{Float64}(α=3.487921561087423, θ=0.32951356487309685)
θ: 3.034776429872083
)

tiesh = tieshift(distx, disty) = -7.056549533732756
mediansh = median(disty) - median(distx) = -7.2235941208835275
meansh = mean(disty) - mean(distx) = -7.252332052281023

shifttype = :median
distx = distx + mediansh = LocationScale{Float64, Continuous, InverseGamma{Float64}}(
μ: -7.2235941208835275
σ: 1.0
ρ: InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.0062305898749053625)
θ: 160.49844718999245
)

)

winningrate(distx, disty) = 0.5338369295451292
median(distx) = 0.9601137791206842
median(disty) = 0.9601137791206837
mean(distx) = 1.2485418341160521
mean(disty) = 1.219803902718557
m = 50
n = 100

  0.339683 seconds (576.95 k allocations: 30.182 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.08529
ECDF(pval_mw, α) = 0.16164

Out[54]:

In [55]:

distx = inversegammadist(2, 1)
disty = inversegammadist(1, 10)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, m=50, n=100, shifttype=:mean, L=10^5)

std(distx) = 2.0
std(disty) = 1.0
skewness(distx) = 1.0
skewness(disty) = 10.0
distx = InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.0062305898749053625)
θ: 160.49844718999245
)

disty = InverseGamma{Float64}(
invd: Gamma{Float64}(α=3.487921561087423, θ=0.32951356487309685)
θ: 3.034776429872083
)

tiesh = tieshift(distx, disty) = -7.056549533732756
mediansh = median(disty) - median(distx) = -7.2235941208835275
meansh = mean(disty) - mean(distx) = -7.252332052281023

shifttype = :mean
distx = distx + meansh = LocationScale{Float64, Continuous, InverseGamma{Float64}}(
μ: -7.252332052281023
σ: 1.0
ρ: InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.0062305898749053625)
θ: 160.49844718999245
)

)

winningrate(distx, disty) = 0.5395715441158672
median(distx) = 0.9313758477231886
median(disty) = 0.9601137791206837
mean(distx) = 1.2198039027185565
mean(disty) = 1.219803902718557
m = 50
n = 100

  0.336990 seconds (581.63 k allocations: 31.543 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.09575
ECDF(pval_mw, α) = 0.17917

Out[55]:

In [53]:

distx = inversegammadist(2, 1)
disty = inversegammadist(1, 10)
@show std(distx) std(disty)
@show skewness(distx) skewness(disty)
plot_sim_bm_mw(; distx, disty, m=50, n=100, shifttype=:tie, L=10^5)

std(distx) = 2.0
std(disty) = 1.0
skewness(distx) = 1.0
skewness(disty) = 10.0
distx = InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.0062305898749053625)
θ: 160.49844718999245
)

disty = InverseGamma{Float64}(
invd: Gamma{Float64}(α=3.487921561087423, θ=0.32951356487309685)
θ: 3.034776429872083
)

tiesh = tieshift(distx, disty) = -7.056549533732756
mediansh = median(disty) - median(distx) = -7.2235941208835275
meansh = mean(disty) - mean(distx) = -7.252332052281023

shifttype = :tie
distx = distx + tiesh = LocationScale{Float64, Continuous, InverseGamma{Float64}}(
μ: -7.056549533732756
σ: 1.0
ρ: InverseGamma{Float64}(
invd: Gamma{Float64}(α=19.94427190999916, θ=0.0062305898749053625)
θ: 160.49844718999245
)

)

winningrate(distx, disty) = 0.49999999999999956
median(distx) = 1.1271583662714555
median(disty) = 0.9601137791206837
mean(distx) = 1.4155864212668234
mean(disty) = 1.219803902718557
m = 50
n = 100

  0.406596 seconds (563.29 k allocations: 26.225 MiB)
α = 0.05
ECDF(pval_bm, α) = 0.05111
ECDF(pval_mw, α) = 0.10776

Out[53]:

In [ ]: