using PyPlot, Statistics, LinearAlgebra

x = randn(100) # 100 gaussian random numbers:
plot(x, x * 0, "r.")
grid()

x = randn(10000)
hist(x, bins=100);

mean(x)

sum((x.-mean(x)).^2)/(length(x)-1)

var(x)

xbig = randn(10^7)
mean(xbig), var(xbig)

o = ones(length(x))

μ = o'x / o'o

mean(x) # same answer:

norm((I - o*o'/o'o)*x)^2 / (o'o - 1)

var(x) # same answer:

norm(x - o*(o'x)/o'o)^2 / (o'o - 1) # same answer, much faster

θ = range(0,4π,length=100)
x = @. sin(θ + randn()*0.1) * (1 + 0.3*randn())
y = @. sin(θ + randn()*0.1) * (1 + 0.3*randn())
z = randn(100)
plot(x, "b.-")
plot(y, "r.-")
plot(z, "g.-")
legend(["x = sin with noise","y = sin with noise","z = noise"])

mean(x),mean(y),mean(z)

# A simple covariance function.  See https://github.com/JuliaStats/StatsBase.jl for
# better statistical functions in Julia.
covar(x,y) = dot(x .- mean(x), y .- mean(y)) / (length(x) - 1)

covar(x,y), covar(x,z)

covar(x,y) / sqrt(covar(x,x) * covar(y,y)) # correlation, manually computed

cor(x,y)

cor(x,z)

abs(cor(x,y)/cor(x,z))

subplot(1,2,1)
plot(x, y, "r*")
axis("square")
title("y vs. x: correlated")
xlabel(L"x")
ylabel(L"y")
xlim(-2, 2)

subplot(1,2,2)
plot(x, z, "b*")
axis("square")
title("z vs. x: uncorrelated")
xlabel(L"x")
ylabel(L"z")
xlim(-2, 2)

plot(x,y, ".")
xlabel("x")
ylabel("y")
axhline(0, linestyle="--", color="k", alpha=0.2)
axvline(0, linestyle="--", color="k", alpha=0.2)

A = [x.-mean(x) y.-mean(y)] # columns are x and y with means subtracted

S = A' * A / (length(x)-1)

U, σ, V = svd(A / sqrt(length(x)-1))

σ.^2

V

plot(x.-mean(x),y.-mean(y), ".")
xlabel("x")
ylabel("y")
axhline(0, linestyle="--", color="k", alpha=0.2)
axvline(0, linestyle="--", color="k", alpha=0.2)
arrow(0,0, V[:,1]..., head_width=0.1, color="r")
arrow(0,0, V[:,2]..., head_width=0.1, color="r")
text(V[1,1]+0.1,V[2,1]-0.1, "v₁", color="r")
text(V[1,2]+0.1,V[2,2], "v₂", color="r")
axis("equal")