using Requests longleyurl = get("http://hua-zhou.github.io/teaching/biostatm280-2018spring/slides/13-cond/longley.txt") longley = readdlm(IOBuffer(longleyurl.data)) using StatPlots gr() corrplot(longley, label = ["Employ" "Price" "GNP" "Jobless" "Military" "PopSize" "Year"]) # response: Employment y = longley[:, 1] # predictor matrix X = [ones(length(y)) longley[:, 2:end]] # Julia function for obtaining condition number cond(X) # what's the implementation? @which cond(X) # we see the smallest singular value (aka trouble number) is very small xsvals = svdvals(X) # condition number of the design matrix xcond = maximum(xsvals) / minimum(xsvals) # X is full rank from SVD xrksvd = rank(X) @which rank(X) # least squares from QR X \ y # Gram matrix G = X'X # rank of Gram matrix from SVD # rank deficient! rank(G) svdvals(G) # rank of Gram matrix from cholesky # full! gchol = cholfact(G, :L, Val{true}) rank(gchol) # least squares solution from Cholesky matches those from QR gchol \ (X'y) Xsp = convert(Matrix{Float32}, X) ysp = convert(Vector{Float32}, y) # least squares by QR: dramatically different from Float64 solution Xsp \ ysp # least squares by Cholesky: failed G = Xsp'Xsp gchol = cholfact(G, :L, Val{true}) gchol \ (Xsp'ysp) rank(Xsp) # rank of Gram matrix by Cholesky rank(gchol) # rank of Gram matrix by SVD rank(G) using StatsBase # center and standardize matrix along dimension 1 Xcs = zscore(X[:, 2:end], 1) Xcs = [ones(length(y)) Xcs] cond(Xcs) rank(Xcs) rank(Xcs'Xcs) versioninfo()