#!/usr/bin/env python # coding: utf-8 # # Comparison of SDOT bike collision stats and SPD bike enforcement rates # # ##### Ethan C. Campbell, for Central Seattle Greenways / Helmet Law Working Group # # For questions, contact me at ethanchenbell@gmail.com. # #### Import packages and set file system # In[240]: get_ipython().run_line_magic('matplotlib', 'inline') from numpy import * import pandas as pd pd.set_option('display.max_columns',100) pd.set_option('display.max_colwidth',50) import matplotlib.pyplot as plt import matplotlib.dates as mdates import matplotlib.ticker as mtick import matplotlib.colors as mcolors import matplotlib.patches as mpatches import matplotlib as mpl mpl.rcParams['figure.dpi'] = 300 # turn on for higher-quality figure export from datetime import datetime, timedelta import platform import warnings import sys from IPython.core.display import display, HTML # display(HTML("")) # choose root directory for data files if platform.system() == 'Darwin': data_dir = '/Users/Ethan/Documents/Finances and records/2020-06-30 - Helmet Law Working Group/Data/' results_dir = '/Users/Ethan/Documents/Finances and records/2020-06-30 - Helmet Law Working Group/Figures/' elif platform.system() == 'Linux': data_dir = '/dat1/ethancc/CSG/' results_dir = data_dir # set directory paths current_results_dir = results_dir + '2021-12-13 - SDOT bike injury stats vs. SPD citations/' # #### Load compiled individual bike citation records (2003-2021; incomplete) # # Note: file created previously in Jupyter notebook [***csg_compile_king_county_bike_citations.ipynb***](https://github.com/ethan-campbell/Miscellaneous/blob/master/csg_compile_king_county_bike_citations.ipynb), which is available on my GitHub. # In[3]: kc_citations = pd.read_excel(data_dir + '2021-10-18 - compiled King County bike citation records.xlsx').drop(columns=['Unnamed: 0']) spd_citations = kc_citations[kc_citations['Law Enforcement Agency'] == 'Seattle Police Department'] spd_citations.head(3) # In[87]: # from individual Seattle Municipal Court records (and possibly a handful of odd KCDC entries from WA AOC records) # NOTE: these records are ≥99% complete only from 2014 onwards, except for 2016 (72% complete) cite_count_all_incomplete = spd_citations['Law Code'].groupby(spd_citations['Violation Date'].dt.year).count().drop(1962) print('All citations:') display(cite_count_all_incomplete) citations_helmet_only = spd_citations.loc[spd_citations['Law Code'] == \ 'King County Health Code 9.10.010A or 9.15.010'] cite_count_helmet_incomplete = citations_helmet_only['Law Code'].groupby(citations_helmet_only['Violation Date'].dt.year).count() print('\nHelmet citations only:') display(cite_count_helmet_incomplete) # #### Load Seattle Municipal Court summary table of bike citations (2011-2020) # # Source: transcribed from Seattle Municipal Court website annual summary table (note: much of this data has since been purged from their records and website) # In[85]: # load annual bike infraction data smc_summary_table = pd.read_excel(data_dir + '2020-07-18 - Seattle Municipal Court bike violations (transcribed from online table).xlsx', index_col=0) display(smc_summary_table) # #### Load Seattle Municipal Court historical data on bike citations, from Seattle Times (1990-2014) # # Source: *Seattle Times*, "The cop and the courier: Seattle’s top-ticketing officer, and its most-ticketed cyclist" by Gene Balk (July 15, 2015) # In[86]: from IPython.display import Image display(Image(url='https://static.seattletimes.com/wp-content/uploads/2015/07/aed058d2-2b4d-11e5-bef9-f00a1a6eacb9-1560x1133.jpg', width=500,height=300)) seattle_historical = pd.Series(index=arange(1990,2015), data=[143,149,172,125,161,157,142,179,101,141,150,152,159, 197,195,215,478,289,577,525,656,733,405,501,270]) # #### Compile single time series of citation data # In[277]: annual_citations = pd.DataFrame(index=range(1990,2021+1), data={'Total bike citations':tile(NaN,2021+1-1990), 'Total bike citations - data source':tile('',2021+1-1990), 'Helmet citations':tile(NaN,2021+1-1990), 'Helmet citations - data source':tile('',2021+1-1990)}) for year in annual_citations.index: if year in seattle_historical.index: annual_citations.loc[year,'Total bike citations'] = seattle_historical.loc[year] annual_citations.loc[year,'Total bike citations - data source'] = 'Seattle Times (1990-2014)' if year in smc_summary_table.index: tot = smc_summary_table.sum(axis=1).loc[year] hel = smc_summary_table.loc[year,'Bicycle Helmet Required (9.10.010, 9.15.010)'] if isnan(annual_citations.loc[year,'Total bike citations']) \ or tot >= annual_citations.loc[year,'Total bike citations']: annual_citations.loc[year,'Total bike citations'] = tot annual_citations.loc[year,'Total bike citations - data source'] = 'Seattle Municipal Court summary (2011-2020)' annual_citations.loc[year,'Helmet citations'] = hel annual_citations.loc[year,'Helmet citations - data source'] = 'Seattle Municipal Court summary (2011-2020)' if year in cite_count_all_incomplete.index: tot = cite_count_all_incomplete.loc[year] if isnan(annual_citations.loc[year,'Total bike citations']) \ or tot >= annual_citations.loc[year,'Total bike citations']: annual_citations.loc[year,'Total bike citations'] = tot annual_citations.loc[year,'Total bike citations - data source'] = 'Seattle Municipal Court records (2003-2021)' if year in cite_count_helmet_incomplete.index: hel = cite_count_helmet_incomplete.loc[year] if (isnan(annual_citations.loc[year,'Helmet citations']) \ or hel >= annual_citations.loc[year,'Helmet citations']): if year >= 2011: annual_citations.loc[year,'Helmet citations'] = hel annual_citations.loc[year,'Helmet citations - data source'] = 'Seattle Municipal Court records (2003-2021)' elif year < 2011: tot = cite_count_all_incomplete.loc[year] hel_ratio = hel / tot annual_citations.loc[year,'Helmet citations'] = hel_ratio * annual_citations.loc[year,'Total bike citations'] annual_citations.loc[year,'Helmet citations - data source'] = 'Estimated from total using fraction of helmet citations in Seattle Municipal Court records (2003-2011)' # insufficient data for 2021 annual_citations = annual_citations.drop(2021) display(annual_citations) # #### Load SDOT raw bike collision/injury/fatality data # # Note: collision data set was obtained from https://data.seattle.gov/dataset/Collisions/m2eh-6pdv and is current as of December 1, 2021. # # Description of data available at: https://www.seattle.gov/Documents/Departments/SDOT/GIS/Collisions_OD.pdf. # In[398]: # note: will take ~30 sec to load sdot_collisions = pd.read_csv(data_dir + '2021-12-13 - SDOT traffic collisions data.csv', parse_dates=['INCDTTM'],low_memory=False) # In[399]: sdot_collisions['INCDATE_DT'] = sdot_collisions['INCDTTM'].dt.date bike_collision_mask = sdot_collisions['SDOT_COLDESC'].str.contains('PEDALCYCLIST').astype(bool) collision_subset = sdot_collisions[['INCDATE_DT','INJURIES','SERIOUSINJURIES','FATALITIES']].loc[bike_collision_mask] collision_subset['INJURIES'] = collision_subset['INJURIES'].clip(upper=1) collision_subset['SERIOUSINJURIES'] = collision_subset['SERIOUSINJURIES'].clip(upper=1) collision_subset['FATALITIES'] = collision_subset['FATALITIES'].clip(upper=1) collision_subset.loc[collision_subset['FATALITIES'] == 1,'SERIOUSINJURIES'] = 0 collision_subset.loc[collision_subset['FATALITIES'] == 1,'INJURIES'] = 0 collision_subset.loc[collision_subset['SERIOUSINJURIES'] == 1,'INJURIES'] = 0 sdot_summary_estimated = collision_subset.groupby(collision_subset['INCDATE_DT'].astype(datetime64).dt.year).sum().drop(2003) display(sdot_summary_estimated) # #### Load SDOT-processed bike collision/injury/fatality summary stats # # Data transcribed from Table 19 ("Bicycle Collisions") in [SDOT's 2020 Traffic Report](http://www.seattle.gov/Documents/Departments/SDOT/About/DocumentLibrary/Reports/2020_Traffic_Report.pdf), which is based on Seattle PD collision reports (also see Fig. 22) # In[342]: sdot_summary = pd.read_excel(data_dir + '2021-12-13 - SDOT traffic collisions stats (Table 19).xlsx',index_col='Year') display(sdot_summary) # #### Compile single time series of collision data # In[400]: annual_collisions = pd.DataFrame(index=range(2004,2021+1), data={'Total injuries and deaths':tile(NaN,2021+1-2004), 'Total injuries and deaths - data source':tile('',2021+1-2004), 'Serious injuries and deaths':tile(NaN,2021+1-2004), 'Serious injuries and deaths - data source':tile('',2021+1-2004)}) sdot_summary_estimated_total = sdot_summary_estimated.sum(axis=1) sdot_summary_total = sdot_summary[['Possible/Evident Injury','Serious Injury','Fatal Collisions']].sum(axis=1) for year in annual_collisions.index: if year in sdot_summary_estimated_total.index: annual_collisions.loc[year,'Total injuries and deaths'] = sdot_summary_estimated_total.loc[year] annual_collisions.loc[year,'Total injuries and deaths - data source'] = 'Estimated from SDOT/SPD records (2004-2021)' annual_collisions.loc[year,'Serious injuries and deaths'] = \ sdot_summary_estimated.loc[year,'SERIOUSINJURIES'] + sdot_summary_estimated.loc[year,'FATALITIES'] annual_collisions.loc[year,'Serious injuries and deaths - data source'] = 'Estimated from SDOT/SPD records (2004-2021)' if year in sdot_summary_total.index: annual_collisions.loc[year,'Total injuries and deaths'] = sdot_summary_total.loc[year] annual_collisions.loc[year,'Total injuries and deaths - data source'] = 'SDOT 2020 Traffic Report (2009-2019)' annual_collisions.loc[year,'Serious injuries and deaths'] = \ sdot_summary.loc[year,'Serious Injury'] + sdot_summary.loc[year,'Fatal Collisions'] annual_collisions.loc[year,'Serious injuries and deaths - data source'] = 'SDOT 2020 Traffic Report (2009-2019)' display(annual_collisions) # In[402]: plt.figure(figsize=(12,5.5),facecolor='w') plt.fill_between(x=annual_citations['Total bike citations'].index, y1=annual_citations['Total bike citations'].values, color='0.3',zorder=2,label='All bicycle citations') plt.fill_between(x=annual_citations['Helmet citations'].loc[:2011].index, y1=annual_citations['Helmet citations'].loc[:2011].values, color='0.7',zorder=3,label='Helmet citations only') plt.fill_between(x=annual_citations['Helmet citations'].loc[:2011].index, y1=annual_citations['Helmet citations'].loc[:2011].values, color='none',hatch='\\\\\\',edgecolor='k',alpha=0.5,linewidth=0.0,zorder=3) plt.fill_between(x=[1,2],y1=[NaN,NaN],color='0.7',hatch='\\\\\\',edgecolor='k', alpha=0.5,linewidth=0.0,label='Extrapolated from\nincomplete records') plt.fill_between(x=annual_citations['Helmet citations'].loc[2011:].index, y1=annual_citations['Helmet citations'].loc[2011:].values, color='0.7',zorder=3) plt.ylabel('Citations per year') plt.xlabel('Year') plt.legend(loc='upper left',frameon=False) plt.grid(lw=0.5,alpha=0.5,zorder=0) plt.xlim([2004,2020]) plt.ylim([0,825]) plt.savefig(current_results_dir + 'seattle_annual_citations.pdf') # add SDOT collision stats ax2 = plt.gca().twinx() plt.fill_between(x=annual_collisions['Total injuries and deaths'].index, y1=annual_collisions['Total injuries and deaths'].values, color='wheat',alpha=0.5,zorder=2,label='Injuries of cyclists') plt.fill_between(x=annual_collisions['Serious injuries and deaths'].index, y1=annual_collisions['Serious injuries and deaths'].values, color='orange',alpha=0.5,zorder=2,label='Serious injuries and deaths') plt.ylabel('Injuries and deaths per year',color='darkgoldenrod') plt.gca().tick_params(axis='y',labelcolor='darkgoldenrod') plt.legend(loc='upper right',frameon=False) plt.ylim([0,550]) plt.savefig(current_results_dir + 'seattle_annual_citations_with_collisions.pdf') # #### Plot pie chart of Seattle bike violation types # In[219]: # frequency distribution of all Seattle bicycle citations viol_freq = 100 * spd_citations[['Law Description','Law Code']].value_counts(normalize=True) viol_freq = viol_freq.reset_index() viol_freq['Law'] = viol_freq['Law Description'].str.lstrip('Bicycle ') + ' (' + \ viol_freq['Law Code'].str.replace('Seattle Municipal Code','SMC')\ .str.replace('King County Health Code 9.10.010A or 9.15.010', 'King County Health Code 9.10/9.15') + ')' viol_freq = viol_freq[['Law',0]] display(viol_freq) # In[220]: # add up less frequent violations viol_freq.loc[16] = {'Law':'Others',0:viol_freq.loc[range(8,15+1)].sum(axis=0)[0]} viol_freq = viol_freq.drop(index=range(8,15+1)) display(viol_freq) # In[251]: def autopct(percent): return ('%.1f%%' % percent) plt.figure(figsize=(8,4),facecolor='w') wedges, labels, autopct = plt.pie(viol_freq[0], explode=tile(0.01,len(viol_freq))*arange(len(viol_freq))**1.75,startangle=0, labels=viol_freq['Law'], colors=(linspace(0.01,0.65,len(viol_freq))**0.25).astype(str), labeldistance=1.10,autopct=autopct,pctdistance=0.75) autopct[0].set_color('w') autopct[1].set_color('w') for lab_idx, lab in enumerate(labels): lab.set_fontsize((5*viol_freq[0]**0.125).iloc[lab_idx]) for pct_idx, pct in enumerate(autopct): pct.set_fontsize((5*viol_freq[0]**0.125).iloc[pct_idx]) plt.title('Bicycle-related infractions in Seattle (2003-2021)'); plt.tight_layout() plt.savefig(current_results_dir + 'seattle_bike_infractions_pie_chart.pdf') # In[ ]: