#!/usr/bin/env python # coding: utf-8 # # Breweries in the United States in 2011 # In[1]: from IPython.display import HTML HTML('''
''') # The following shows number of breweries in the United States in 2011 per state, along breweries per capita per state and breweries per sq mi per state. Population data was taken from 2010 census. # In[2]: import pandas as pd population = pd.read_csv('./../data/pop_density.csv', skiprows=3, index_col=0)['2010_POPULATION'].to_dict() # In[3]: area_string = '''Alaska 1 663,267 sq mi 1,717,854 sq km Texas 2 268,580 sq mi 695,621 sq km California 3 163,695 sq mi 423,970 sq km Montana 4 147,042 sq mi 380,838 sq km New Mexico 5 121,589 sq mi 314,915 sq km Arizona 6 113,998 sq mi 295,254 sq km Nevada 7 110,560 sq mi 286,351 sq km Colorado 8 104,093 sq mi 269,601 sq km Oregon 9 98,380 sq mi 254,805 sq km Wyoming 10 97,813 sq mi 253,336 sq km Michigan 11 96,716 sq mi 250,494 sq km Minnesota 12 86,938 sq mi 225,171 sq km Utah 13 84,898 sq mi 219,887 sq km Idaho 14 83,570 sq mi 216,446 sq km Kansas 15 82,276 sq mi 213,096 sq km Nebraska 16 77,358 sq mi 200,356 sq km South Dakota 17 77,121 sq mi 199,742 sq km Washington 18 71,300 sq mi 184,665 sq km North Dakota 19 70,700 sq mi 183,112 sq km Oklahoma 20 69,899 sq mi 181,035 sq km Missouri 21 69,704 sq mi 180,533 sq km Florida 22 65,755 sq mi 170,304 sq km Wisconsin 23 65,498 sq mi 169,639 sq km Georgia 24 59,425 sq mi 153,909 sq km Illinois 25 57,914 sq mi 149,998 sq km Iowa 26 56,271 sq mi 145,743 sq km New York 27 54,556 sq mi 141,299 sq km North Carolina 28 53,818 sq mi 139,389 sq km Arkansas 29 53,179 sq mi 137,732 sq km Alabama 30 52,419 sq mi 135,765 sq km Louisiana 31 51,840 sq mi 134,264 sq km Mississippi 32 48,431 sq mi 125,434 sq km Pennsylvania 33 46,056 sq mi 119,283 sq km Ohio 34 44,825 sq mi 116,096 sq km Virginia 35 42,774 sq mi 110,785 sq km Tennessee 36 42,144 sq mi 109,151 sq km Kentucky 37 40,410 sq mi 104,659 sq km Indiana 38 36,418 sq mi 94,321 sq km Maine 39 35,385 sq mi 91,646 sq km South Carolina 40 32,020 sq mi 82,932 sq km West Virginia 41 24,230 sq mi 62,755 sq km Maryland 42 12,407 sq mi 32,133 sq km Hawaii 43 10,931 sq mi 28,311 sq km Massachusetts 44 10,555 sq mi 27,336 sq km Vermont 45 9,615 sq mi 24,901 sq km New Hampshire 46 9,350 sq mi 24,216 sq km New Jersey 47 8,722 sq mi 22,588 sq km Connecticut 48 5,544 sq mi 14,357 sq km Delaware 49 2,489 sq mi 6,447 sq km Rhode Island 50 1,545 sq mi 4,002 sq km District of Columbia 51 68.25 sq mi 176.75 sq km''' area_string = area_string.replace('sq mi', '').replace('sq km', '') area = dict() for l in area_string.splitlines(): data = l.split() size = int(float(data[-2].replace(',', ''))) name = ' '.join(data[0:-3]) area[name] = size # In[4]: df = pd.read_csv('./../data/breweries.csv', index_col=0) df.columns us_df = df[df['country']=='United States'].copy(deep=True) us_df.loc[471, 'state'] = 'Florida' us_df = us_df[~ us_df['state'].isnull()] us_df.loc[1393, 'state'] = 'Maine' us_df.loc[1397, 'state'] = 'Kansas' us_df.loc[1398, 'state'] = 'Illinois' us_df.loc[1399, 'state'] = 'New Jersey' us_df.loc[1402, 'state'] = 'New York' us_df.loc[1404, 'state'] = 'Missouri' us_df.loc[[1407, 1413], 'state'] = 'North Carolina' us_df.loc[[1409], 'state'] = 'Ohio' us_df.loc[[1410], 'state'] = 'Wisconsin' us_df.loc[[1411], 'state'] = 'Massachusetts' us_df.loc[[1416], 'state'] = 'Michigan' us_df.loc[[1417, 1418], 'state'] = 'Oregon' us_df.loc[[1420], 'state'] = 'California' us_df.loc[[1421], 'state'] = 'District of Columbia' us_df = us_df[us_df['state'] != 'Virgin Islands'] us_df = us_df[us_df['state'] != 'District of Columbia'] states = dict() for s in sorted(us_df['state'].unique()): states[s] = dict() states[s]['area'] = area[s] states[s]['population'] = population[s] states[s]['count'] = len(us_df[us_df['state'] == s]) states_df = pd.DataFrame(states).T states_df['per_capita'] = states_df['count']/states_df['population'] states_df['per_sqmiles'] = states_df['count']/states_df['area'] states_df['states'] = states_df.index states_df = states_df.sort_values('count', ascending=False) # In[5]: states_df.head() # In[6]: northeast = ['New Jersey', 'New York', 'Pennsylvania', 'Connecticut', 'Maine', 'Massachusetts', 'New Hampshire', 'Rhode Island', 'Vermont'] midwest = ['Illinois', 'Indiana', 'Michigan', 'Ohio', 'Wisconsin', 'Iowa', 'Kansas', 'Minnesota', 'Missouri', 'Nebraska', 'North Dakota', 'South Dakota'] south = ['Delaware', 'Florida', 'Georgia', 'Maryland', 'North Carolina', 'South Carolina', 'Virginia', 'District of Columbia', 'West Virginia', 'Alabama', 'Kentucky', 'Mississippi', 'Tennessee', 'Arkansas', 'Louisiana', 'Oklahoma', 'Texas'] west = ['Arizona', 'Colorado', 'Idaho', 'Montana', 'Nevada', 'New Mexico', 'Utah', 'Wyoming', 'Alaska', 'California', 'Hawaii', 'Oregon', 'Washington'] # In[7]: def f(v): if v in northeast: return 'northeast' elif v in midwest: return 'midwest' elif v in south: return 'south' elif v in west: return 'west' else: return 'other' states_df['region'] = states_df['states'].apply(f) # In[8]: states_df['population_format'] = states_df['population'].apply(lambda x: "{:,}".format(x)) states_df['area_format'] = states_df['area'].apply(lambda x: "{:,}".format(x)) states_df['x'] = range(0, len(states_df.index)) states_df['states'] = states_df.index states_df['count_by_2'] = states_df['count']/2 states_df['color'] = '#1F77B4' # In[9]: import bokeh from bokeh.models import ( ColumnDataSource, HoverTool, Circle, CategoricalColorMapper, LinearInterpolator, Row, CategoricalTickFormatter, CustomJS, Rect, ) from bokeh.plotting import figure from bokeh.io import output_notebook, show output_notebook() # ### Number of breweries and per capita vs per sq. mile breweries # In[10]: source = ColumnDataSource(data=states_df) size_mapper = LinearInterpolator(x=[states_df['population'].min(), states_df['population'].max()], y=[2, 10]) hover1 = HoverTool( tooltips="""
[@index]
Population: @population_format
Area(sq mi): @area_format
""", ) p1 = figure(width=475, height=400, tools='pan,box_select,box_zoom,reset') p1.x_range.bounds = (0, 3e-5) p1.x_range.end = 3e-5 p1.y_range.bounds = (0, 3e-3) p1.y_range.end = 3e-3 circle = Circle(x='per_capita', y='per_sqmiles', radius_units='screen', fill_color='color', radius={'field':'population', 'transform': size_mapper},) p1.add_glyph(source, circle) p1.xaxis.axis_label = 'Breweries Per Capita' p1.yaxis.axis_label = 'Breweries Per Sq Mile' hover2 = HoverTool( tooltips="""
[@states]
Count: @count
""", ) p2 = figure(width=475, height=400, x_range=list(states_df.index), tools='pan,box_select,box_zoom,reset') rect = Rect(x='states', y='count_by_2', width=1, height='count', fill_color='color') p2.y_range.bounds = (0, 150) p2.y_range.start = 0 p2.xaxis.major_label_orientation = 3.14 / 3 p2.x_range.bounds = list(states_df.index) p2.add_glyph(source, rect) p2.yaxis.axis_label = 'Number of breweries' callback1 = CustomJS(args=dict(source=source, hover=hover2), code=""" for (i=0; i < source.data.color.length; i++) { source.data.color[i] = '#1F77B4' } var indices = cb_data.index['1d'].indices; for (i=0; i < indices.length; i++) { ind0 = indices[i] source.data.color[ind0] = '#b4531f' } source.trigger('change') """) callback2 = CustomJS(args=dict(source=source, hover=hover1), code=""" for (i=0; i < source.data.color.length; i++) { source.data.color[i] = '#1F77B4' } var indices = cb_data.index['1d'].indices; for (i=0; i < indices.length; i++) { ind0 = indices[i] source.data.color[ind0] = '#b4531f' } source.trigger('change') """) hover1.callback = callback1 hover2.callback = callback2 p1.add_tools(hover1) p2.add_tools(hover2) p = Row(p2, p1) show(p) # #### Breweries by region # In[11]: import geopandas as gpd import numpy as np def getXYCoords(geometry, coord_type): """ Returns either x or y coordinates from geometry coordinate sequence. Used with LineString and Polygon geometries.""" if coord_type == 'x': return geometry.coords.xy[0] elif coord_type == 'y': return geometry.coords.xy[1] def getPolyCoords(geometry, coord_type): """ Returns Coordinates of Polygon using the Exterior of the Polygon.""" ext = geometry.exterior return getXYCoords(ext, coord_type) def getLineCoords(geometry, coord_type): """ Returns Coordinates of Linestring object.""" return getXYCoords(geometry, coord_type) def getPointCoords(geometry, coord_type): """ Returns Coordinates of Point object.""" if coord_type == 'x': return geometry.x elif coord_type == 'y': return geometry.y def multiGeomHandler(multi_geometry, coord_type, geom_type): """ Function for handling multi-geometries. Can be MultiPoint, MultiLineString or MultiPolygon. Returns a list of coordinates where all parts of Multi-geometries are merged into a single list. Individual geometries are separated with np.nan which is how Bokeh wants them. # Bokeh documentation regarding the Multi-geometry issues can be found here (it is an open issue) # https://github.com/bokeh/bokeh/issues/2321 """ for i, part in enumerate(multi_geometry): # On the first part of the Multi-geometry initialize the coord_array (np.array) if i == 0: if geom_type == "MultiPoint": coord_arrays = np.append(getPointCoords(part, coord_type), np.nan) elif geom_type == "MultiLineString": coord_arrays = np.append(getLineCoords(part, coord_type), np.nan) elif geom_type == "MultiPolygon": coord_arrays = np.append(getPolyCoords(part, coord_type), np.nan) else: if geom_type == "MultiPoint": coord_arrays = np.concatenate([coord_arrays, np.append(getPointCoords(part, coord_type), np.nan)]) elif geom_type == "MultiLineString": coord_arrays = np.concatenate([coord_arrays, np.append(getLineCoords(part, coord_type), np.nan)]) elif geom_type == "MultiPolygon": coord_arrays = np.concatenate([coord_arrays, np.append(getPolyCoords(part, coord_type), np.nan)]) # Return the coordinates return coord_arrays def getCoords(row, geom_col, coord_type): """ Returns coordinates ('x' or 'y') of a geometry (Point, LineString or Polygon) as a list (if geometry is LineString or Polygon). Can handle also MultiGeometries. """ # Get geometry geom = row[geom_col] # Check the geometry type gtype = geom.geom_type # "Normal" geometries # ------------------- if gtype == "Point": return getPointCoords(geom, coord_type) elif gtype == "LineString": return list( getLineCoords(geom, coord_type) ) elif gtype == "Polygon": return list( getPolyCoords(geom, coord_type) ) # Multi geometries # ---------------- else: return list( multiGeomHandler(geom, coord_type, gtype) ) data = gpd.read_file('../data/states_21basic/states.shp') data = data.to_crs(crs=data.crs) data['geom_x'] = data.apply(getCoords, geom_col="geometry", coord_type="x", axis=1) data['geom_y'] = data.apply(getCoords, geom_col="geometry", coord_type="y", axis=1) data = data.drop('geometry', axis=1) plot_df = pd.merge(data, states_df, left_on='STATE_NAME', right_index=True) dfsource = ColumnDataSource(data=plot_df) # In[12]: WIDTH = 900 TOOLS = "pan,wheel_zoom,box_zoom,reset,save" p = figure(width=int(WIDTH), height=int(WIDTH/1.5), title="", tools=TOOLS, x_axis_location=None, y_axis_location=None ) palette = ['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c'] color_mapper = CategoricalColorMapper(factors=list(states_df['region'].unique()), palette=palette) # alpha_mapper = LinearInterpolator(x=[states_df['count'].min(), states_df['count'].max()], y=[.1, 1]) patches = p.patches('geom_x', 'geom_y', source=dfsource, name='Name', fill_color={'field': 'region', 'transform': color_mapper}, line_color="white", line_width=0.5) hover = HoverTool(renderers=[patches]) hover.tooltips=[("Name", "@states"),("Count", "@count")] p.add_tools(hover) show(p) # In[13]: source = ColumnDataSource(data=states_df) hover1 = HoverTool( tooltips="""
[@index]
Population: @population_format
Area(sq mi): @area_format
""", ) p1 = figure(width=475, height=400, tools='pan,box_select,box_zoom,reset') p1.x_range.bounds = (0, 3e-5) p1.x_range.end = 3e-5 p1.y_range.bounds = (0, 3e-3) p1.y_range.end = 3e-3 circle = Circle(x='per_capita', y='per_sqmiles', radius_units='screen', fill_color={'field': 'region', 'transform': color_mapper}, radius={'field':'population', 'transform': size_mapper},) p1.add_glyph(source, circle) p1.xaxis.axis_label = 'Breweries Per Capita' p1.yaxis.axis_label = 'Breweries Per Sq Mile' hover2 = HoverTool( tooltips="""
[@states]
Count: @count
""", ) p2 = figure(width=475, height=400, x_range=list(states_df.index), tools='pan,box_select,box_zoom,reset') rect = Rect(x='states', y='count_by_2', width=1, height='count', fill_color={'field': 'region', 'transform': color_mapper},) p2.y_range.bounds = (0, 150) p2.y_range.start = 0 p2.xaxis.major_label_orientation = 3.14 / 3 p2.x_range.bounds = list(states_df.index) p2.add_glyph(source, rect) p2.yaxis.axis_label = 'Number of breweries' p1.add_tools(hover1) p2.add_tools(hover2) p = Row(p2, p1) show(p) # ##### States with the most breweries # In[14]: pd.DataFrame(states_df.sort_values('count', ascending=False).head(5)['count']) # ###### States with highest breweries per capita # In[15]: pd.DataFrame(states_df.sort_values('per_capita', ascending=False).head(5)['per_capita']) # ###### States with highest breweries per sq. mile # In[16]: pd.DataFrame(states_df.sort_values('per_sqmiles', ascending=False).head(5)['per_sqmiles']) # In[ ]: