require 'statsample'
Statsample::Analysis.store("Statsample::Bivariate.correlation_matrix") do
# It so happens that Daru::Vector and Daru::DataFrame must update metadata
# like positions of missing values every time they are created.
#
# Since we dont have any missing values in the data that we are creating,
# we set Daru.lazy_update = true so that missing data is not updated every
# time and things happen much faster.
#
# In case you do have missing data and lazy_update has been set to *true*,
# you _SHOULD_ called `#update` on the concerned Vector or DataFrame object
# everytime an assingment or deletion cycle is complete.
Daru.lazy_update = true
# Create a Daru::DataFrame containing 4 vectors a, b, c and d.
#
# Notice that the `clone` option has been set to *false*. This tells Daru
# to not clone the Daru::Vectors being supplied by `rnorm`, since it would
# be unnecessarily counter productive to clone the vectors once they have
# been assigned to the dataframe.
samples = 1000
ds = Daru::DataFrame.new({
:a => rnorm(samples),
:b => rnorm(samples),
:c => rnorm(samples),
:d => rnorm(samples)
}, clone: false)
puts "== DataFrame ==\n"
IRuby.display ds.head
# Calculate correlation matrix by calling the `cor` shorthand.
cm = Statsample::Bivariate.correlation_matrix(ds)
puts "\n== Correlation Matrix ==\n"
IRuby.display cm
# Set lazy_update to *false* once our job is done so that this analysis does
# not accidentally affect code elsewhere.
Daru.lazy_update = false
end
Statsample::Analysis.run_batch
"== DataFrame ==\n"
a | b | c | d | |
---|---|---|---|---|
0 | -2.440821265161855 | -0.2663867705446803 | -0.9667049163587536 | -0.6247662665284197 |
1 | -0.9973263526986711 | 0.8965870460177997 | -0.4924093636219239 | -1.0325364294957489 |
2 | -0.2744289410160191 | 1.8088687531883922 | -0.5910605176882341 | 1.3789993200304744 |
3 | 1.8688690135894495 | -0.5845393024341371 | 2.140052050767279 | -0.11879447826728619 |
4 | 1.0841575504132723 | 0.29819461734869424 | 0.5801339485682966 | 1.935529360750203 |
5 | 0.8365312603239075 | -2.592250495057871 | -0.5377005060865632 | -1.3156474227148434 |
6 | -0.6143096811863075 | 0.9303368054803663 | -0.2762842748435172 | 0.695374402222615 |
7 | 0.9288749463298932 | 0.17342875610525094 | -0.18948493978562986 | 0.5439278783192012 |
8 | -1.2315590581627647 | -0.5070225404828463 | 0.7560851316374544 | -0.5377282599496545 |
9 | -0.8552039210385985 | 0.1061517520214962 | -1.2859991357392173 | -0.7772438525282614 |
10 | 0.109004152753796 | -0.5146703134593364 | 1.6338786244674606 | -0.39285194944821017 |
"\n== Correlation Matrix ==\n"
Analysis 2015-06-03 15:17:33 +0530 = Statsample::Bivariate.correlation_matrix