from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
full = Table.read_table('nc-est2014-agesex-res.csv')
full.show(30)
partial = full.select('SEX', 'AGE', 'POPESTIMATE2010', 'POPESTIMATE2014')
partial
simple = partial.relabeled('POPESTIMATE2010', '2010').relabeled('POPESTIMATE2014', '2014')
simple
simple.sort('AGE', descending=True)
no_999 = simple.where('AGE', are.below(999))
no_999
everyone = no_999.where('SEX', 0).drop('SEX')
males = no_999.where('SEX', 1).drop('SEX')
females = no_999.where('SEX', 2).drop('SEX')
females
females.sort('2014', descending=True)
males.sort('2014', descending=True)
pop_2014 = Table().with_columns(
'Age', males['AGE'],
'Males', males['2014'],
'Females', females['2014']
)
pop_2014
percent_females = 100*pop_2014['Females']/(pop_2014.column('Males') + pop_2014.column('Females'))
counts_and_percents = pop_2014.with_column('Percent Female', percent_females)
counts_and_percents
counts_and_percents.plot('Age', 'Percent Female')
pop_2014
pop_2014.plot('Age')
pop_2014.where('Age', are.between(65, 75))
2014 - np.arange(67, 73)
everyone
everyone = everyone.with_columns(
'Change', everyone.column('2014') - everyone.column('2010')
)
everyone.sort('Change', descending=True)
everyone.with_columns(
'Growth Rate', (everyone.column('2014')/everyone.column('2010')) ** (1/4) - 1
).sort('Growth Rate', descending=True)
actors = Table.read_table('actors.csv')
actors
actors.scatter('Number of Movies', 'Total Gross')
actors.scatter('Number of Movies', 'Average per Movie')
actors.where('Average per Movie', are.above(150))