import plotly.plotly as py
import pandas as pd
# The datasets' url. Thanks Jennifer Bryan!
url_csv = 'http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt'
df = pd.read_csv(url_csv, sep='\t')
df.head()
country | year | pop | continent | lifeExp | gdpPercap | |
---|---|---|---|---|---|---|
0 | Afghanistan | 1952 | 8425333 | Asia | 28.801 | 779.445314 |
1 | Afghanistan | 1957 | 9240934 | Asia | 30.332 | 820.853030 |
2 | Afghanistan | 1962 | 10267083 | Asia | 31.997 | 853.100710 |
3 | Afghanistan | 1967 | 11537966 | Asia | 34.020 | 836.197138 |
4 | Afghanistan | 1972 | 13079460 | Asia | 36.088 | 739.981106 |
5 rows × 6 columns
Let's plot population as a function of the year for a few selected countries
countries = ['China', 'India', 'United States', 'Bangladesh', 'South Africa']
fill_colors = ['#66c2a5', '#fc8d62', '#8da0cb', '#e78ac3', '#a6d854']
gf = df.groupby('country')
data = []
for country, fill_color in zip(countries[::-1], fill_colors):
group = gf.get_group(country)
years = group['year'].tolist()
length = len(years)
country_coords = [country] * length
pop = group['pop'].tolist()
zeros = [0] * length
data.append(dict(
type='scatter3d',
mode='lines',
x=years + years[::-1] + [years[0]], # year loop: in incr. order then in decr. order then years[0]
y=country_coords * 2 + [country_coords[0]],
z=pop + zeros + [pop[0]],
name='',
surfaceaxis=1, # add a surface axis ('1' refers to axes[1] i.e. the y-axis)
surfacecolor=fill_color,
line=dict(
color='black',
width=4
),
))
layout = dict(
title='Population from 1957 to 2007 [Gapminder]',
showlegend=False,
scene=dict(
xaxis=dict(title=''),
yaxis=dict(title=''),
zaxis=dict(title=''),
camera=dict(
eye=dict(x=-1.7, y=-1.7, z=0.5)
)
)
)
fig = dict(data=data, layout=layout)
py.iplot(fig, validate=False, filename='filled-3d-lines')