In this notebook we use the new Sunburst plot by plotly to illustrate how the World population is splitted among regions and countries. The data set illustrated here originates from the World Bank. This notebook is also a quick demo for the world_bank_data Python package.
import pandas as pd
import mock
import plotly.offline as offline
import world_bank_data as wb
try:
# Python 3.6
from urllib.request import urlopen
except ImportError:
# Python 2.7
from urllib import urlopen
# Only show head and tail of dataframes
pd.set_option('display.max_rows', 6)
# Plotly.js in version 1.46.1
def get_latest_plotlyjs(url='https://cdn.plot.ly/plotly-1.46.1.min.js'):
return urlopen(url).read().decode('utf-8')
with mock.patch('plotly.offline.offline.get_plotlyjs', get_latest_plotlyjs):
offline.init_notebook_mode()
# Countries and associated regions
countries = wb.get_countries()
countries
iso2Code | name | region | adminregion | incomeLevel | lendingType | capitalCity | longitude | latitude | |
---|---|---|---|---|---|---|---|---|---|
id | |||||||||
ABW | AW | Aruba | Latin America & Caribbean | High income | Not classified | Oranjestad | -70.0167 | 12.5167 | |
AFG | AF | Afghanistan | South Asia | South Asia | Low income | IDA | Kabul | 69.1761 | 34.5228 |
AFR | A9 | Africa | Aggregates | Aggregates | Aggregates | NaN | NaN | ||
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
ZAF | ZA | South Africa | Sub-Saharan Africa | Sub-Saharan Africa (excluding high income) | Upper middle income | IBRD | Pretoria | 28.1871 | -25.7460 |
ZMB | ZM | Zambia | Sub-Saharan Africa | Sub-Saharan Africa (excluding high income) | Lower middle income | IDA | Lusaka | 28.2937 | -15.3982 |
ZWE | ZW | Zimbabwe | Sub-Saharan Africa | Sub-Saharan Africa (excluding high income) | Low income | Blend | Harare | 31.0672 | -17.8312 |
304 rows × 9 columns
# Population dataset, by the World Bank (most recent value)
population = wb.get_series('SP.POP.TOTL', mrv=1)
population
Country Series Year Arab World Population, total 2017 414491886.0 Caribbean small states Population, total 2017 7284294.0 Central Europe and the Baltics Population, total 2017 102727102.0 ... Yemen, Rep. Population, total 2017 28250420.0 Zambia Population, total 2017 17094130.0 Zimbabwe Population, total 2017 16529904.0 Name: SP.POP.TOTL, Length: 264, dtype: float64
# Same data set, indexed with the country code
population = wb.get_series('SP.POP.TOTL', id_or_value='id', simplify_index=True, mrv=1)
population
Country ARB 414491886.0 CSS 7284294.0 CEB 102727102.0 ... YEM 28250420.0 ZMB 17094130.0 ZWE 16529904.0 Name: SP.POP.TOTL, Length: 264, dtype: float64
# Aggregate region, country and population
df = countries[['region', 'name']].rename(columns={'name': 'country'}).loc[countries.region != 'Aggregates']
df['population'] = population
df
region | country | population | |
---|---|---|---|
id | |||
ABW | Latin America & Caribbean | Aruba | 105264.0 |
AFG | South Asia | Afghanistan | 35530081.0 |
AGO | Sub-Saharan Africa | Angola | 29784193.0 |
... | ... | ... | ... |
ZAF | Sub-Saharan Africa | South Africa | 56717156.0 |
ZMB | Sub-Saharan Africa | Zambia | 17094130.0 |
ZWE | Sub-Saharan Africa | Zimbabwe | 16529904.0 |
218 rows × 3 columns
# The sunburst plot requires weights (values), labels, and parent (region, or World)
# We build the corresponding table here
columns = ['parents', 'labels', 'values']
level1 = df.copy()
level1.columns = columns
level1['text'] = level1['values'].apply(lambda pop: '{:,.0f}'.format(pop))
level2 = df.groupby('region').population.sum().reset_index()[['region', 'region', 'population']]
level2.columns = columns
level2['parents'] = 'World'
# move value to text for this level
level2['text'] = level2['values'].apply(lambda pop: '{:,.0f}'.format(pop))
level2['values'] = 0
level3 = pd.DataFrame({'parents': [''], 'labels': ['World'],
'values': [0.0], 'text': ['{:,.0f}'.format(population.loc['WLD'])]})
all_levels = pd.concat([level1, level2, level3], axis=0).reset_index(drop=True)
all_levels
parents | labels | values | text | |
---|---|---|---|---|
0 | Latin America & Caribbean | Aruba | 105264.0 | 105,264 |
1 | South Asia | Afghanistan | 35530081.0 | 35,530,081 |
2 | Sub-Saharan Africa | Angola | 29784193.0 | 29,784,193 |
... | ... | ... | ... | ... |
223 | World | South Asia | 0.0 | 1,788,388,852 |
224 | World | Sub-Saharan Africa | 0.0 | 1,056,038,890 |
225 | World | 0.0 | 7,530,360,149 |
226 rows × 4 columns
# And now we can plot the World Population
offline.iplot(dict(
data=[dict(type='sunburst', hoverinfo='text', **all_levels)],
layout=dict(title='World Population (World Bank, 2017)<br>Click on a region to zoom',
width=800, height=800)),
validate=False)