import pandas as pd
%%time
national_names_df = pd.read_csv('./../data/Names/NationalNames.csv', index_col=0)
state_names_df = pd.read_csv('./../data/Names/StateNames.csv', index_col=0)
CPU times: user 11.1 s, sys: 915 ms, total: 12 s Wall time: 12.7 s
yearly_female_state_names_df = state_names_df.loc[(
(state_names_df['Year']==2000)
& (state_names_df['Gender']=='F')
)].drop(
['Year', 'Gender'],
axis=1).pivot_table(
index='State',
values='Count',
columns='Name')
def f(row):
return ( row.argmax(), row.max() )
_yearly_top_names_s = yearly_female_state_names_df.apply(f, axis=1)
yearly_top_names_df = pd.DataFrame([_yearly_top_names_s.apply(lambda x: x[0]), _yearly_top_names_s.apply(lambda x: x[1])]).T
yearly_top_names_df.columns = ['Name', 'Count']
%matplotlib inline
import matplotlib.pyplot as plt
df = yearly_top_names_df
fig, axs = plt.subplots(1, 1, figsize=(16, 10))
ax = axs
yearly_top_names_df['Count'].plot.bar(ax=ax)
for i, (state, row) in enumerate(df.iterrows()):
ax.text(i-0.5, row['Count'], row['Name'])
time_series_names_df = state_names_df.pivot_table(index='Name', columns='Year', values=['Count'], aggfunc=sum).T.loc['Count'].fillna(0)
fig, axs = plt.subplots(1, 1, figsize=(16, 10))
ax = axs
time_series_names_df.sum(axis=1).plot(ax=ax)
<matplotlib.axes._subplots.AxesSubplot at 0x10878a5d0>
import geopandas as gpd
import numpy as np
def getXYCoords(geometry, coord_type):
""" Returns either x or y coordinates from geometry coordinate sequence. Used with LineString and Polygon geometries."""
if coord_type == 'x':
return geometry.coords.xy[0]
elif coord_type == 'y':
return geometry.coords.xy[1]
def getPolyCoords(geometry, coord_type):
""" Returns Coordinates of Polygon using the Exterior of the Polygon."""
ext = geometry.exterior
return getXYCoords(ext, coord_type)
def getLineCoords(geometry, coord_type):
""" Returns Coordinates of Linestring object."""
return getXYCoords(geometry, coord_type)
def getPointCoords(geometry, coord_type):
""" Returns Coordinates of Point object."""
if coord_type == 'x':
return geometry.x
elif coord_type == 'y':
return geometry.y
def multiGeomHandler(multi_geometry, coord_type, geom_type):
"""
Function for handling multi-geometries. Can be MultiPoint, MultiLineString or MultiPolygon.
Returns a list of coordinates where all parts of Multi-geometries are merged into a single list.
Individual geometries are separated with np.nan which is how Bokeh wants them.
# Bokeh documentation regarding the Multi-geometry issues can be found here (it is an open issue)
# https://github.com/bokeh/bokeh/issues/2321
"""
for i, part in enumerate(multi_geometry):
# On the first part of the Multi-geometry initialize the coord_array (np.array)
if i == 0:
if geom_type == "MultiPoint":
coord_arrays = np.append(getPointCoords(part, coord_type), np.nan)
elif geom_type == "MultiLineString":
coord_arrays = np.append(getLineCoords(part, coord_type), np.nan)
elif geom_type == "MultiPolygon":
coord_arrays = np.append(getPolyCoords(part, coord_type), np.nan)
else:
if geom_type == "MultiPoint":
coord_arrays = np.concatenate([coord_arrays, np.append(getPointCoords(part, coord_type), np.nan)])
elif geom_type == "MultiLineString":
coord_arrays = np.concatenate([coord_arrays, np.append(getLineCoords(part, coord_type), np.nan)])
elif geom_type == "MultiPolygon":
coord_arrays = np.concatenate([coord_arrays, np.append(getPolyCoords(part, coord_type), np.nan)])
# Return the coordinates
return coord_arrays
def getCoords(row, geom_col, coord_type):
"""
Returns coordinates ('x' or 'y') of a geometry (Point, LineString or Polygon) as a list (if geometry is LineString or Polygon).
Can handle also MultiGeometries.
"""
# Get geometry
geom = row[geom_col]
# Check the geometry type
gtype = geom.geom_type
# "Normal" geometries
# -------------------
if gtype == "Point":
return getPointCoords(geom, coord_type)
elif gtype == "LineString":
return list( getLineCoords(geom, coord_type) )
elif gtype == "Polygon":
return list( getPolyCoords(geom, coord_type) )
# Multi geometries
# ----------------
else:
return list( multiGeomHandler(geom, coord_type, gtype) )
data = gpd.read_file('../data/states_21basic/states.shp')
data = data.to_crs(crs=data.crs)
data['x'] = data.apply(getCoords, geom_col="geometry", coord_type="x", axis=1)
data['y'] = data.apply(getCoords, geom_col="geometry", coord_type="y", axis=1)
data = data.drop('geometry', axis=1)
from __future__ import division
from bokeh.io import show, output_notebook, push_notebook
output_notebook()
WIDTH=600
from __future__ import print_function
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
def update(gender, slider):
yearly_female_state_names_df = state_names_df.loc[(
(state_names_df['Year']==slider)
& (state_names_df['Gender']==gender)
)].drop(
['Year', 'Gender'],
axis=1).pivot_table(
index='State',
values='Count',
columns='Name')
def f(row):
return ( row.argmax(), row.max() )
_yearly_top_names_s = yearly_female_state_names_df.apply(f, axis=1)
yearly_top_names_df = pd.DataFrame([_yearly_top_names_s.apply(lambda x: x[0]), _yearly_top_names_s.apply(lambda x: x[1])]).T
yearly_top_names_df.columns = ['Name', 'Count']
color_mapper.low = int(float(yearly_top_names_df.max()['Count']))
color_mapper.high = int(float(yearly_top_names_df.max()['Count']))
patches.data_source.data['Name'] = list(yearly_top_names_df.loc[patches.data_source.data['STATE_ABBR'], 'Name'])
patches.data_source.data['Count'] = list(yearly_top_names_df.loc[patches.data_source.data['STATE_ABBR'], 'Count'])
push_notebook()
from bokeh.models import (
ColumnDataSource,
HoverTool,
LogColorMapper, ColorBar, LogTicker, LinearColorMapper,
)
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure
palette.reverse()
color_mapper = LogColorMapper(palette=palette)
# Select only necessary columns for our plotting to keep the amount of data minumum
plot_data = pd.merge(data, yearly_top_names_df, left_on='STATE_ABBR', right_index=True)
df = plot_data[['x', 'y', 'Count', 'Name', 'STATE_ABBR']]
dfsource = ColumnDataSource(data=df)
TOOLS = "pan,wheel_zoom,box_zoom,reset,save"
p = figure(width=int(WIDTH), height=int(WIDTH/1.5),
title="", tools=TOOLS,
x_axis_location=None, y_axis_location=None
)
p.grid.grid_line_color = None
patches = p.patches('x', 'y',
source=dfsource, name='Name',
fill_color={'field': 'Count', 'transform': color_mapper},
fill_alpha=0.7, line_color="white", line_width=0.5)
color_bar = ColorBar(color_mapper=color_mapper, border_line_color=None, location=(0,0))
p.add_layout(color_bar, 'left')
hover = HoverTool(renderers=[patches])
hover.tooltips=[("Count", "@Count"),
("Name", "@Name"),]
p.add_tools(hover)
h = show(p, notebook_handle=True)
interact(update, gender=['M', 'F'], slider=widgets.IntSlider(min=sorted(list(state_names_df['Year'].unique()))[0],
max=sorted(list(state_names_df['Year'].unique()))[-1],
step=1),
__manual=True
);