using DataFrames
using Distributions
using LinearAlgebra
dot2(x) = dot(x, x)
using Plots
default(fmt=:png)
using RCall
using RDatasets
data = [
0.67 0.83
0.66 1.34
0.68 1.27
0.68 1.53
0.71 1.72
0.71 1.54
0.71 1.39
0.72 1.23
0.73 1.58
0.7325 1.58
0.74 1.60
0.755 1.55
0.755 1.63
0.76 1.65
0.765 1.5
0.77 1.56
0.77 1.4
0.772 1.44
0.773 1.74
0.78 1.79
0.7825 1.71
0.7875 1.36
0.7975 1.45
0.795 1.52
0.805 1.69
0.82 1.65
0.835 1.605
0.8475 1.48
0.865 1.36
0.89 1.72
]
@show size(data)
X, Y = eachcol(data)
df = DataFrame(X = X, Y = Y);
size(data) = (30, 2)
function plot_ols(X, Y;
xlim=(0.6, 1.0), ylim=(0.6, 2.0),
xtick=0.6:0.1:1.0, ytick=0.6:0.2:2.0,
size=(1000, 450),
α = 0.05,
xguide = "gender gap index",
yguide = "total fertility rate",
plotci = true,
legend = :bottomright
)
n = length(X)
A = [ones(n) X]
β̂ = A \ Y
Ŷ = A * β̂
s = √(dot2(Y - Ŷ)/(n - 2))
c = quantile(TDist(n - 2), 1 - α/2)
f(x) = evalpoly(x, β̂)
g(a, b) = √([a, b]' * (A'A \ [a, b]))
R = cor(X, Y)
pval_β₁(β₁) = 2ccdf(TDist(n - 2), abs(β̂[2] - β₁)/(s*g(0, 1)))
pval = pval_β₁(0)
ci = [β̂[2] - c*s*g(0, 1), β̂[2] + c*s*g(0, 1)]
ci′ = [β̂[2] - c*abs(β̂[2])/√(n-2)*√((1 - R^2)/R^2), β̂[2] + c*abs(β̂[2])/√(n-2)*√((1 - R^2)/R^2)]
@show pval
@show ci
@show ci′
println()
println("y = $(round(β̂[2], digits=2)) x + $(round(β̂[1], digits=2))")
println("R² = ", round(R^2; digits=2))
println("P-value of \"β₁ = 0\" = ", round(pval; digits=4))
println("$(100(1-α))% CI of β₁ = $(round.(ci; digits=4))")
P = scatter(X, Y; label="", xlim, ylim, xtick, ytick)
plot!(f; label="regression line", c=2)
plotci && plot!(x -> f(x) + c*s*g(1, x); label="$(100(1-α))% CI", ls=:dot, c=3)
plotci && plot!(x -> f(x) - c*s*g(1, x); label="", c=3)
plotci && plot!(x -> ci[begin]*(x - mean(X)) + mean(Y); label="max β₁", ls=:dash, c=4)
plotci && plot!(x -> ci[end]*(x - mean(X)) + mean(Y); label="min β₁", ls=:dashdot, c=5)
plot!(; xguide, yguide, legend)
Q = plot(pval_β₁, β̂[2] - 4s*g(0, 1), β̂[2] + 4s*g(0, 1); label="")
vline!([β̂[2]]; label="point estimate of β₁", ls=:dash, c=2)
vline!([0.0]; label="", c=:black, lw=0.5)
plotci && plot!(ci, fill(α, 2); label="$(100(1-α))% CI", lw=2, c=3)
plot!(ytick=0:0.05:1, xguide="β₁", yguide="P-value")
plot(P, Q; size)
plot!(leftmargin=4Plots.mm, bottommargin=4Plots.mm)
end
plot_ols (generic function with 1 method)
plot_ols(X, Y)
pval = 0.022663207605639057 ci = [0.2129756784822372, 2.6121676242433525] ci′ = [0.21297567848225407, 2.6121676242433356] y = 1.41 x + 0.44 R² = 0.17 P-value of "β₁ = 0" = 0.0227 95.0% CI of β₁ = [0.213, 2.6122]
plot_ols(X, Y; α = 0.02)
pval = 0.022663207605639057 ci = [-0.03224430620124985, 2.8573876089268397] ci′ = [-0.032244306201229866, 2.8573876089268193] y = 1.41 x + 0.44 R² = 0.17 P-value of "β₁ = 0" = 0.0227 98.0% CI of β₁ = [-0.0322, 2.8574]
@rput df
R"""
library(ggplot2)
ggplot(df, aes(x = X, y = Y)) +
geom_point() +
stat_smooth(method = lm)
"""
RObject{VecSxp} `geom_smooth()` using formula = 'y ~ x'
@rput df
R"""
library(ggplot2)
ggplot(df, aes(x = X, y = Y)) +
geom_point() +
geom_smooth(method = lm, level=0.98)
"""
RObject{VecSxp} `geom_smooth()` using formula = 'y ~ x'
plot_ols(X[begin+1:end], Y[begin+1:end])
pval = 0.1184249372779967 ci = [-0.2150076457114457, 1.7945213231275523] ci′ = [-0.21500764571145103, 1.7945213231275576] y = 0.79 x + 0.93 R² = 0.09 P-value of "β₁ = 0" = 0.1184 95.0% CI of β₁ = [-0.215, 1.7945]
df1 = DataFrame(X = X[begin+1:end], Y = Y[begin+1:end])
@rput df1
R"""
library(ggplot2)
ggplot(df1, aes(x = X, y = Y)) +
geom_point() +
geom_smooth(method = lm)
"""
RObject{VecSxp} `geom_smooth()` using formula = 'y ~ x'
plot_ols(X[begin+1:end-1], Y[begin+1:end-1])
pval = 0.2609375462470625 ci = [-0.49821261116664106, 1.7616409123606291] ci′ = [-0.4982126111666486, 1.7616409123606367] y = 0.63 x + 1.05 R² = 0.05 P-value of "β₁ = 0" = 0.2609 95.0% CI of β₁ = [-0.4982, 1.7616]
df2 = DataFrame(X = X[begin+1:end-1], Y = Y[begin+1:end-1])
@rput df2
R"""
library(ggplot2)
ggplot(df2, aes(x = X, y = Y)) +
geom_point() +
geom_smooth(method = lm)
"""
RObject{VecSxp} `geom_smooth()` using formula = 'y ~ x'
plot_ols(X, Y; plotci=false)
pval = 0.022663207605639057 ci = [0.2129756784822372, 2.6121676242433525] ci′ = [0.21297567848225407, 2.6121676242433356] y = 1.41 x + 0.44 R² = 0.17 P-value of "β₁ = 0" = 0.0227 95.0% CI of β₁ = [0.213, 2.6122]
plot_ols(X[begin+1:end], Y[begin+1:end]; plotci=false)
pval = 0.1184249372779967 ci = [-0.2150076457114457, 1.7945213231275523] ci′ = [-0.21500764571145103, 1.7945213231275576] y = 0.79 x + 0.93 R² = 0.09 P-value of "β₁ = 0" = 0.1184 95.0% CI of β₁ = [-0.215, 1.7945]
anscombe = dataset("datasets", "anscombe")
Row | X1 | X2 | X3 | X4 | Y1 | Y2 | Y3 | Y4 |
---|---|---|---|---|---|---|---|---|
Int64 | Int64 | Int64 | Int64 | Float64 | Float64 | Float64 | Float64 | |
1 | 10 | 10 | 10 | 8 | 8.04 | 9.14 | 7.46 | 6.58 |
2 | 8 | 8 | 8 | 8 | 6.95 | 8.14 | 6.77 | 5.76 |
3 | 13 | 13 | 13 | 8 | 7.58 | 8.74 | 12.74 | 7.71 |
4 | 9 | 9 | 9 | 8 | 8.81 | 8.77 | 7.11 | 8.84 |
5 | 11 | 11 | 11 | 8 | 8.33 | 9.26 | 7.81 | 8.47 |
6 | 14 | 14 | 14 | 8 | 9.96 | 8.1 | 8.84 | 7.04 |
7 | 6 | 6 | 6 | 8 | 7.24 | 6.13 | 6.08 | 5.25 |
8 | 4 | 4 | 4 | 19 | 4.26 | 3.1 | 5.39 | 12.5 |
9 | 12 | 12 | 12 | 8 | 10.84 | 9.13 | 8.15 | 5.56 |
10 | 7 | 7 | 7 | 8 | 4.82 | 7.26 | 6.42 | 7.91 |
11 | 5 | 5 | 5 | 8 | 5.68 | 4.74 | 5.73 | 6.89 |
xlim = (2, 20)
ylim = (2, 14)
xtick = 2:2:20
ytick = 2:14;
@show fit_mle(MvNormal, [anscombe.X1 anscombe.Y1]')
plot_ols(anscombe.X1, anscombe.Y1; xlim, ylim, xtick, ytick)
plot!(plot_title="Anscombe 1")
fit_mle(MvNormal, ([anscombe.X1 anscombe.Y1])') = FullNormal( dim: 2 μ: [9.0, 7.500909090909093] Σ: [10.0 5.000909090909092; 5.000909090909092 3.752062809917356] ) pval = 0.0021696288730787953 ci = [0.23337013638518755, 0.7668116817966307] ci′ = [0.2333701363851875, 0.7668116817966308] y = 0.5 x + 3.0 R² = 0.67 P-value of "β₁ = 0" = 0.0022 95.0% CI of β₁ = [0.2334, 0.7668]
@show fit_mle(MvNormal, [anscombe.X2 anscombe.Y2]')
plot_ols(anscombe.X2, anscombe.Y2; xlim, ylim, xtick, ytick)
plot!(plot_title="Anscombe 2")
fit_mle(MvNormal, ([anscombe.X2 anscombe.Y2])') = FullNormal( dim: 2 μ: [9.0, 7.500909090909091] Σ: [10.0 5.0; 5.0 3.7523900826446277] ) pval = 0.0021788162369107984 ci = [0.2331474671087937, 0.7668525328912066] ci′ = [0.2331474671087938, 0.7668525328912064] y = 0.5 x + 3.0 R² = 0.67 P-value of "β₁ = 0" = 0.0022 95.0% CI of β₁ = [0.2331, 0.7669]
@show fit_mle(MvNormal, [anscombe.X3 anscombe.Y3]')
plot_ols(anscombe.X3, anscombe.Y3; xlim, ylim, xtick, ytick)
plot!(plot_title="Anscombe 3")
fit_mle(MvNormal, ([anscombe.X3 anscombe.Y3])') = FullNormal( dim: 2 μ: [9.0, 7.500000000000001] Σ: [10.0 4.997272727272727; 4.997272727272727 3.747836363636364] ) pval = 0.0021763052792280174 ci = [0.2330694748001253, 0.7663850706544203] ci′ = [0.2330694748001252, 0.7663850706544204] y = 0.5 x + 3.0 R² = 0.67 P-value of "β₁ = 0" = 0.0022 95.0% CI of β₁ = [0.2331, 0.7664]
@show fit_mle(MvNormal, [anscombe.X4 anscombe.Y4]')
plot_ols(anscombe.X4, anscombe.Y4; xlim, ylim, xtick, ytick)
plot!(plot_title="Anscombe 4")
fit_mle(MvNormal, ([anscombe.X4 anscombe.Y4])') = FullNormal( dim: 2 μ: [9.0, 7.50090909090909] Σ: [10.0 4.999090909090909; 4.999090909090909 3.74840826446281] ) pval = 0.002164602347197216 ci = [0.23338412796197872, 0.7664340538562033] ci′ = [0.23338412796197855, 0.7664340538562036] y = 0.5 x + 3.0 R² = 0.67 P-value of "β₁ = 0" = 0.0022 95.0% CI of β₁ = [0.2334, 0.7664]