import plotly.plotly as py
import json
import urllib2
from bs4 import BeautifulSoup
from IPython.display import IFrame
Get the data from the Wikipedia page on the subject.
page_url = 'https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_population'
IFrame(page_url, 950, 500)
soup = BeautifulSoup(urllib2.urlopen(page_url))
states = []
values = []
ranks = []
# N.B. the data of interest is the first table of the page,
# state names are in the third column
# values of interest are the the fourth column
def parse_float(x):
x_no_comma = x.replace(',', '')
try:
return float(x_no_comma)
except:
return False
for i, row in enumerate(soup.findAll('table')[0].findAll('tr')):
tds = row.findAll('td')
if len(tds):
anchors = tds[2].findAll('a')
if anchors:
value = parse_float(tds[3].contents[0])
if value:
states.append(anchors[0].contents[0])
values.append(value)
ranks.append(i)
Plotly's USA-states
location understands two-letter abbreviations; we'll need to convert the full name that scraped from the Wikipedia table.
states_titlecase = json.load(urllib2.urlopen('https://gist.githubusercontent.com/mshafrir/2646763/raw/8b0dbb93521f5d6889502305335104218454c2bf/states_titlecase.json'))
states_titlecase[0] # one item
{u'abbreviation': u'AL', u'name': u'Alabama'}
locations = []
z = []
hovertext = []
names = []
for s in states_titlecase:
for state, value, rank in zip(states, values, ranks):
if s['name'].lower() == state.lower():
locations.append(s['abbreviation'])
z.append(value)
hovertext.append(u'<b>Rank:</b> {0}<br>{1}'.format(rank, state))
names.append(state)
py.iplot(
dict(
data=[
dict(
type='choropleth',
locationmode='USA-states',
locations=locations,
z=z,
text=hovertext,
colorscale='Viridis'
)
],
layout=dict(
title='USA states population in 2014',
titlefont=dict(
size=24
),
geo=dict(
scope='usa',
projection=dict(
)
),
autosize=False,
width=800,
height=580
)
),
validate=False,
filename='usa-states-population'
)
# Inject CSS styling in the NB
from IPython.display import display, HTML
display(HTML(open('../_custom.css').read()))