require 'daru' require 'gnuplotrb' CSV.read("data/yob1951.txt").first(10) data_frame = Daru::DataFrame.from_csv("data/yob1951.txt", headers: ['name', 'sex', 'births']) data_frame['year'] = [1951] * data_frame.size (1952..2014).each do |year| temp = Daru::DataFrame.from_csv("data/yob#{year}.txt", headers: ['name', 'sex', 'births']) temp['year'] = [year] * temp.size data_frame = data_frame.concat(temp) end data_frame.vectors = Daru::Index.new(['births','name', 'sex','year']) data_frame pivoted = data_frame.pivot_table( index: ['year'], vectors: ['sex'], agg: :sum, values: 'births') pivoted.index = Daru::DateTimeIndex.date_range(:start => '1951', :periods => pivoted.size, freq: 'YEAR') pivoted.vectors = Daru::Index.new(['F', 'M']) pivoted GnuplotRB::Plot.new( [pivoted['F'], with: 'lines', title: 'F'], [pivoted['M'], with: 'lines', title: 'M'], title: 'Total births by sex and year') groups_by_year = data_frame.group_by(['year']) nil pieces = [] ['James', 'Robert', 'Jessica', 'Sophia'].each do |name| rows = [] groups_by_year.each_group do |group| rows << group.row[group['name'].index_of(name)] end pieces << Daru::DataFrame.rows(rows) end pieces.each do |df| df['year'].map! {|e| DateTime.new(e) } df.set_index('year') df.rename df['name'][0] df.delete_vector 'name' end plots = [] pieces.each do |df| plot = GnuplotRB::Plot.new([ df['births'], with: 'lines', title: df.name]) plot.format_x = '%Y' plots << plot end GnuplotRB::Multiplot.new(*plots).tap do |mp| mp.layout = [2,2] mp.format_x = '%Y' mp.xtics = 'nomirror rotate by -45' mp.title = 'Prevalence of certain names according to year' mp.xlabel = 'Year' mp.ylabel = 'Occurences' mp.xrange = '"1945-01-01":"2016-01-01"' end