#!/usr/bin/env python # coding: utf-8 # # Measles Incidence in Altair # This is an example of reproducing the Wall Street Journal's famous [Measles Incidence Plot](http://graphics.wsj.com/infectious-diseases-and-vaccines/#b02g20t20w15) in Python using [Altair](http://github.com/ellisonbg/altair/). # ## The Data # # We'll start by downloading the data. Fortunately, others have made the data available in an easily digestible form; a github search revealed the dataset in CSV format here: # In[1]: import pandas as pd url = 'https://raw.githubusercontent.com/blmoore/blogR/master/data/measles_incidence.csv' data = pd.read_csv(url, skiprows=2, na_values='-') data.head() # ## Data Munging with Pandas # This data needs to be cleaned-up a bit; we can do this with the Pandas library. # We first need to aggregate the incidence data by year: # In[2]: annual = data.drop('WEEK', axis=1).groupby('YEAR').sum() annual.head() # Next, because Altair is built to handle data where each row corresponds to a single sample, we will stack the data, re-labeling the columns for clarity: # In[3]: measles = annual.reset_index() measles = measles.melt('YEAR', var_name='state', value_name='incidence') measles.head() # ## Initial Visualization # Now we can use Altair's syntax for generating a heat map: # In[4]: import altair as alt # In[5]: alt.Chart(measles).mark_rect().encode( x='YEAR:O', y='state:N', color='incidence' ).properties( width=600, height=400 ) # ## Adjusting Aesthetics # All operative components of the visualization appear above, we now just have to adjust the aesthetic features to reproduce the original plot. # Altair allows a wide range of flexibility for such adjustments, including size and color of markings, axis labels and titles, and more. # # Here is the data visualized again with a number of these adjustments: # In[6]: # Define a custom colormape using Hex codes & HTML color names colormap = alt.Scale(domain=[0, 100, 200, 300, 1000, 3000], range=['#F0F8FF', 'cornflowerblue', 'mediumseagreen', '#FFEE00', 'darkorange', 'firebrick'], type='sqrt') alt.Chart(measles).mark_rect().encode( alt.X('YEAR:O', axis=alt.Axis(title=None, ticks=False)), alt.Y('state:N', axis=alt.Axis(title=None, ticks=False)), alt.Color('incidence:Q', sort='ascending', scale=colormap, legend=None) ).properties( width=800, height=500 ) # The result clearly shows the impact of the the measles vaccine introduced in the mid-1960s. # ## Layering & Selections # # Here is another view of the data, using layering and selections to allow zooming-in # In[7]: hover = alt.selection_single(on='mouseover', nearest=True, fields=['state'], empty='none') line = alt.Chart().mark_line().encode( alt.X('YEAR:Q', scale=alt.Scale(zero=False), axis=alt.Axis(format='f', title='year') ), alt.Y('incidence:Q', axis=alt.Axis(title='measles incidence')), detail='state:N', opacity=alt.condition(hover, alt.value(1.0), alt.value(0.1)) ).properties( width=800, height=300 ) point = line.mark_point().encode( opacity=alt.value(0.0) ).properties( selection=hover ) mean = alt.Chart().mark_line().encode( x=alt.X('YEAR:Q', scale=alt.Scale(zero=False)), y='mean(incidence):Q', color=alt.value('black') ) text = alt.Chart().mark_text(align='right').encode( x='min(YEAR):Q', y='mean(incidence):Q', text='state:N', detail='state:N', opacity=alt.condition(hover, alt.value(1.0), alt.value(0.0)) ) alt.layer(point, line, mean, text, data=measles).interactive(bind_y=False)