"""
h_brunner_munzel(x, y)
この函数は, x < y のとき 1.0 を, x = y のとき 0.5 を, それら以外のとき 0.0 返す.
"""
h_brunner_munzel(x, y) = (x < y) + (x == y)/2
@doc raw"""
phat_brunner_munzel(X, Y)
まず以下のようにおく:
```math
\begin{aligned}
&
H(x, y) = \begin{cases} 1 & (x < y) \\ 1/2 & (x = y) \\ 0 & (x > y), \end{cases}
\\ &
m = \mathrm{length}(X), \quad
n = \mathrm{length}(Y), \quad
x_i = X[i], \quad
y_j = Y[j]
\end{aligned}
```
この函数は次の $\hat{p}$ を返す:
```math
\hat{p} = \frac{1}{mn}\sum_{i=1}^m \sum_{j=1}^n H(x_i, y_j).
```
"""
phat_brunner_munzel(X, Y) = mean(h_brunner_munzel(x, y) for x in X, y in Y)
@doc raw"""
statistics_brunner_munzel(X, Y,
Hx = similar(X, Float64),
Hy = similar(Y, Float64);
p = 1/2
)
この函数はデータ `X`, `Y` について, Brunner-Munzel検定関係の統計量達を計算する. 詳細は以下の通り.
函数 $H(x, y)$ と $\hat{p}$, $H^x_i$, $H^y_j$, $\bar{H}^x$, $\bar{H}^y$ を次のように定める:
```math
\begin{aligned}
&
m = \mathrm{length}(X), \quad
n = \mathrm{length}(Y), \quad
x_i = X[i], \quad
y_j = Y[j],
\\ &
\hat{p} = \frac{1}{mn}\sum_{i=1}^m \sum_{j=1}^n H(x_i, y_j),
\\ &
H(x, y) = \begin{cases} 1 & (x < y) \\ 1/2 & (x = y) \\ 0 & (x > y), \end{cases}
\\ &
H^x_i = \sum_{j=1}^n H(y_j, x_i), \quad
H^y_j = \sum_{i=1}^m H(x_i, y_j),
\\ &
\bar{H}^x = \frac{1}{m} \sum_{i=1}^m H^x_i = n - n\hat{p},
\\ &
\bar{H}^y = \frac{1}{n} \sum_{j=1}^n H^y_j = m\hat{p}.
\end{aligned}
```
この函数は以下達の named tuple で返す:
```math
\begin{aligned}
&
\mathrm{phat} =
\hat{p} = \frac{\bar{H}^y - \bar{H}^x + n}{m + n},
\\ &
\mathrm{sx2} =
\hat{\sigma}_x^2 = \frac{1}{n^2}\frac{1}{m-1}\sum_{i=1}^m (H^x_i - \bar{H}^x)^2,
\\ &
\mathrm{sy2} =
\hat{\sigma}_y^2 = \frac{1}{m^2}\frac{1}{n-1}\sum_{j=1}^n (H^y_j - \bar{H}^y)^2,
\\ &
\mathrm{sehat} =
\widehat{\mathrm{se}} = \sqrt{\frac{\hat{\sigma}_x^2}{m} + \frac{\hat{\sigma}_y^2}{n}},
\\ &
\mathrm{tvalue} = t = \frac{\hat{p} - p}{\widehat{\mathrm{se}}},
\\ &
\mathrm{df} =
\nu =
\frac
{\left(\hat{\sigma}_x^2/m + \hat{\sigma}_y^2/n\right)^2}
{
\dfrac{\left(\hat{\sigma}_x^2/m\right)^2}{m-1} +
\dfrac{\left(\hat{\sigma}_y^2/n\right)^2}{n-1}
},
\\ &
\mathrm{pvalue} =
2\mathrm{ccdf}(\mathrm{TDist}(\nu), |t|).
\end{aligned}
```
"""
function statistics_brunner_munzel(X, Y,
Hx = similar(X, Float64),
Hy = similar(Y, Float64);
p = 1/2
)
m, n = length(X), length(Y)
for (i, x) in pairs(X)
Hx[i] = sum(h_brunner_munzel(y, x) for y in Y)
end
for (j, y) in pairs(Y)
Hy[j] = sum(h_brunner_munzel(x, y) for x in X)
end
phat = (mean(Hy) - mean(Hx) + n)/(m + n)
sx2, sy2 = var(Hx)/n^2, var(Hy)/m^2
sehat = √(sx2/m + sy2/n)
tvalue = (phat - p)/sehat
df = safediv((sx2/m + sy2/n)^2, (sx2/m)^2/(m-1) + (sy2/n)^2/(n-1))
pvalue = (df != 0 && isfinite(df)) ? 2ccdf(TDist(df), abs(tvalue)) : zero(df)
(; phat, sx2, sy2, sehat, tvalue, df, pvalue)
end
@doc raw"""
pvalue_brunner_munzel(X, Y,
Hx = similar(X, Float64),
Hy = similar(Y, Float64);
p = 1/2
)
この函数はBrunner-Munzel検定のP値 `pvalue` を返す.
"""
function pvalue_brunner_munzel(X, Y,
Hx = similar(X, Float64),
Hy = similar(Y, Float64);
p = 1/2
)
statistics_brunner_munzel(X, Y, Hx, Hy; p).pvalue
end
"""
tieshift(X::AbstractVector, Y::AbstractVector; p = 1/2)
この函数は `phat_brunner_munzel(X, Y .+ a)` の値が `p` に等しくなる `a` を返す.
"""
function tieshift(X::AbstractVector, Y::AbstractVector; p = 1/2)
shiftmin = minimum(X) - maximum(Y) - 0.1
shiftmax = maximum(X) - minimum(Y) + 0.1
find_zero(a -> phat_brunner_munzel(X, Y .+ a) - p, (shiftmin, shiftmax))
end
@doc raw"""
brunner_munzel(X, Y,
Hx = similar(X, Float64),
Hy = similar(Y, Float64),
Ytmp = similar(Y, Float64);
p = 1/2,
α = 0.05,
maxsplit = 30
)
この函数はBrunner-Munzel検定を実行する. 詳細は以下の通り.
この函数は `phat`, `sehat`, `tvalue`, `df`, `p`, `pvalue`, `α` および\
以下達の named tuple を返す.
```math
\begin{aligned}
&
\mathrm{confint\_p} = (\text{$p$ の信頼度 $1-\alpha$ の信頼区間}),
\\ &
\mathrm{confint\_shift} = (\text{2つの集団が互角になるようなシフトの信頼度 $1-\alpha$ の信頼区間}),
\\ &
\mathrm{pvalue\_shift} = ($\mathrm{confint\_shift}$ の計算で使われたP値函数),
\\ &
\mathrm{shifthat} = (\text{2つの集団が互角になるようなシフトの点推定値}).
\end{aligned}
```
さらに, $\mathrm{shiftmin}$, $\mathrm{shiftmax}$ はデータから推定されるシフトの下限と上限.
"""
function brunner_munzel(X, Y,
Hx = similar(X, Float64),
Hy = similar(Y, Float64),
Ytmp = similar(Y, Float64);
p = 1/2,
α = 0.05,
maxsplit = 30
)
(; phat, sehat, tvalue, df, pvalue) = statistics_brunner_munzel(X, Y, Hx, Hy; p)
c = df == 0 ? Inf : quantile(TDist(df), 1 - α/2)
confint_p = [max(0, phat - c*sehat), min(1, phat + c*sehat)]
function pvalue_shift(a)
@. Ytmp = Y + a
pvalue_brunner_munzel(X, Ytmp, Hx, Hy; p)
end
shiftmin = minimum(X) - maximum(Y) - 0.1
shiftmax = maximum(X) - minimum(Y) + 0.1
shifthat = tieshift(X, Y; p)
confint_shift = [
find_zero(a -> pvalue_shift(a) - α, (shiftmin, shifthat))
find_zero(a -> pvalue_shift(a) - α, (shifthat, shiftmax))
]
(; phat, sehat, tvalue, df, p, pvalue, α, confint_p,
confint_shift, pvalue_shift, shifthat, shiftmin, shiftmax)
end
function show_plot_brunner_munzel(X, Y,
Hx = similar(X, Float64),
Hy = similar(Y, Float64),
Ytmp = similar(Y, Float64);
p = 1/2,
α = 0.05,
showXY = false,
kwargs...
)
showXY && (@show X Y)
(; phat, sehat, tvalue, df, p, pvalue, α, confint_p,
confint_shift, pvalue_shift, shifthat, shiftmin, shiftmax) =
brunner_munzel(X, Y, Hx, Hy, Ytmp; p, α)
pprint((; phat, sehat, tvalue, df, p, pvalue, α, confint_p,
confint_shift, shifthat))
println()
@show median(X) median(Y)
plot(pvalue_shift, shiftmin, shiftmax; label="")
vline!([tieshift(X, Y)]; label="", ls=:dash)
title!("P-value function of shift")
plot!(ytick=0:0.05:1)
plot!(; kwargs...)
end