using PyPlot, Statistics, LinearAlgebra x = randn(100) # 100 gaussian random numbers: plot(x, x * 0, "r.") grid() x = randn(10000) hist(x, bins=100); mean(x) sum((x.-mean(x)).^2)/(length(x)-1) var(x) xbig = randn(10^7) mean(xbig), var(xbig) o = ones(length(x)) μ = o'x / o'o mean(x) # same answer: norm((I - o*o'/o'o)*x)^2 / (o'o - 1) var(x) # same answer: norm(x - o*(o'x)/o'o)^2 / (o'o - 1) # same answer, much faster θ = range(0,4π,length=100) x = @. sin(θ + randn()*0.1) * (1 + 0.3*randn()) y = @. sin(θ + randn()*0.1) * (1 + 0.3*randn()) z = randn(100) plot(x, "b.-") plot(y, "r.-") plot(z, "g.-") legend(["x = sin with noise","y = sin with noise","z = noise"]) mean(x),mean(y),mean(z) # A simple covariance function. See https://github.com/JuliaStats/StatsBase.jl for # better statistical functions in Julia. covar(x,y) = dot(x .- mean(x), y .- mean(y)) / (length(x) - 1) covar(x,y), covar(x,z) covar(x,y) / sqrt(covar(x,x) * covar(y,y)) # correlation, manually computed cor(x,y) cor(x,z) abs(cor(x,y)/cor(x,z)) subplot(1,2,1) plot(x, y, "r*") axis("square") title("y vs. x: correlated") xlabel(L"x") ylabel(L"y") xlim(-2, 2) subplot(1,2,2) plot(x, z, "b*") axis("square") title("z vs. x: uncorrelated") xlabel(L"x") ylabel(L"z") xlim(-2, 2) plot(x,y, ".") xlabel("x") ylabel("y") axhline(0, linestyle="--", color="k", alpha=0.2) axvline(0, linestyle="--", color="k", alpha=0.2) A = [x.-mean(x) y.-mean(y)] # columns are x and y with means subtracted S = A' * A / (length(x)-1) U, σ, V = svd(A / sqrt(length(x)-1)) σ.^2 V plot(x.-mean(x),y.-mean(y), ".") xlabel("x") ylabel("y") axhline(0, linestyle="--", color="k", alpha=0.2) axvline(0, linestyle="--", color="k", alpha=0.2) arrow(0,0, V[:,1]..., head_width=0.1, color="r") arrow(0,0, V[:,2]..., head_width=0.1, color="r") text(V[1,1]+0.1,V[2,1]-0.1, "v₁", color="r") text(V[1,2]+0.1,V[2,2], "v₂", color="r") axis("equal")