using Distributions
using StatsPlots
default(fmt=:png)
using Random
function biased_var_vs_unbiased_var(; dist = Normal(), n = 10, L = 1000,
loss_func = (x, v) -> (x - v)^2)
σ² = var(dist)
Loss_biased = Vector{Float64}(undef, L)
Loss_unbiased = Vector{Float64}(undef, L)
tmp = [Vector{Float64}(undef, n) for _ in 1:Threads.nthreads()]
Threads.@threads for i in 1:L
sample = rand!(dist, tmp[Threads.threadid()])
b = var(sample; corrected=false) # biased var
u = var(sample; corrected=true) # unbiased var
Loss_biased[i] = loss_func(b, σ²)
Loss_unbiased[i] = loss_func(u, σ²)
end
Loss_biased, Loss_unbiased
end
function plot_score(; dist = Normal(), n = 10, L = 10^4,
loss_func = (x, v) -> (x - v)^2,
bin = 100)
Loss_biased, Loss_unbiased = biased_var_vs_unbiased_var(; dist, n, L, loss_func)
Score = Loss_unbiased - Loss_biased
@show mean(Score) std(Score)
P1 = plot(cumsum(Score); label="total score", legend=:outertop)
P2 = histogram(Score; alpha=0.3, label="score", legend=:outertop, bin)
vline!([0]; label="", c=:red)
plot(P1, P2; size=(800, 300))
end
Random.seed!(4649373)
TaskLocalRNG()
plot_score(; loss_func = (x, v) -> abs(x - v), L = 100, bin = 20)
mean(Score) = 0.01371993776713453 std(Score) = 0.10633855282733227
plot_score(; loss_func = (x, v) -> abs(x - v))
mean(Score) = 0.014139002992519186 std(Score) = 0.10590131822797655
plot_score(; loss_func = (x, v) -> (x - v)^2, L = 100, bin = 20)
mean(Score) = 0.025613678006661256 std(Score) = 0.11429213364894529
plot_score(; loss_func = (x, v) -> (x - v)^2)
mean(Score) = 0.033755058830330836 std(Score) = 0.13854657499295311
@time plot_score(; loss_func = (x, v) -> (x - v)^2, L = 10^8)
mean(Score) = 0.03221120620692022 std(Score) = 0.14107000797138233 10.306188 seconds (579.51 k allocations: 5.247 GiB, 4.81% gc time, 1.50% compilation time)
plot_score(; loss_func = (x, v) -> abs(√x - √v))
mean(Score) = -0.0014703013540671168 std(Score) = 0.04965878924206156
plot_score(; loss_func = (x, v) -> (√x - √v)^2)
mean(Score) = 5.3791564274421e-6 std(Score) = 0.023816578267568937
plot_score(; loss_func = (x, v) -> abs(log(x) - log(v)))
mean(Score) = -0.022433692862829032 std(Score) = 0.09993008999248255
plot_score(; loss_func = (x, v) -> (log(x) - log(v))^2)
mean(Score) = -0.03601073446799377 std(Score) = 0.10464237365524166
plot_score(; loss_func = (x, v) -> abs(log(x) - log(v)), n = 100)
mean(Score) = -0.0006392480825660843 std(Score) = 0.009937366777722762
plot_score(; loss_func = (x, v) -> (log(x) - log(v))^2, n = 100)
mean(Score) = -0.0003248747436658944 std(Score) = 0.0028771044691938575