import plotly.plotly as py
import plotly.tools as tls
import pandas as pd
import json
import urllib2
import re
The most famous gapminder data is featured in Hans Rosling's bubble charts (see a plotly version here).
In this notebook, we'll explore the same dataset using plotly choropleths.
# big thanks to Jennifer Bryan!
df_full = pd.read_csv('http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt', sep='\t')
df_full.head()
country | year | pop | continent | lifeExp | gdpPercap | |
---|---|---|---|---|---|---|
0 | Afghanistan | 1952 | 8425333 | Asia | 28.801 | 779.445314 |
1 | Afghanistan | 1957 | 9240934 | Asia | 30.332 | 820.853030 |
2 | Afghanistan | 1962 | 10267083 | Asia | 31.997 | 853.100710 |
3 | Afghanistan | 1967 | 11537966 | Asia | 34.020 | 836.197138 |
4 | Afghanistan | 1972 | 13079460 | Asia | 36.088 | 739.981106 |
5 rows × 6 columns
Select a year and truncate the dataframe
# Choose a year, find other years with df['year'].unique()
the_year = 2007
# Find indices corresponding to 'the_year'
i_year = (df_full['year'] == the_year)
# Grab all rows correponding to 'the_year'
df = df_full[i_year]
Define a plot function
def plot(z, title, units):
if units == '$':
colorbar= dict(
tickprefix=' ' + units,
showtickprefix='last'
)
else:
colorbar = dict(
ticksuffix=' ' + units,
showticksuffix='last'
)
url = py.plot(
dict(
data=[
dict(
type='choropleth',
locationmode='country names',
locations=df['country'],
z=z,
colorbar=colorbar
)
],
layout=dict(
title=title + ' by country in ' + str(the_year),
titlefont=dict(
size=22
),
geo=dict(
projection=dict(
type='kavrayskiy7'
)
),
width=1000,
height=600
)
),
validate=False,
filename=title.lower().replace(' ', '-'),
auto_open=False
)
print(url)
Use the plot function for each of the three dependent variables
plot(df['pop'], 'World population', 'million')
plot(df['lifeExp'], 'Life expectancy', 'year')
plot(df['gdpPercap'], 'GDP per capita', '$')
https://plot.ly/~etpinard/4250 https://plot.ly/~etpinard/4252 https://plot.ly/~etpinard/4254
tls.embed('https://plot.ly/~etpinard/4250')
We note that the Russia is not part of the dataset.
tls.embed('https://plot.ly/~etpinard/4252')
tls.embed('https://plot.ly/~etpinard/4254')
# Inject CSS styling in the NB
from IPython.display import display, HTML
display(HTML(open('../_custom.css').read()))