#!/usr/bin/env python # coding: utf-8 # # Analysis of Seattle PD bicycle-related Field Contacts # # ##### Ethan C. Campbell, for Central Seattle Greenways / Helmet Law Working Group # # For questions, contact me at ethanchenbell@gmail.com. # #### Import packages and set file system # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') from numpy import * import pandas as pd pd.set_option('display.max_columns',100) pd.set_option('display.max_colwidth',50) import matplotlib.pyplot as plt import matplotlib.dates as mdates import matplotlib.ticker as mtick import matplotlib.colors as mcolors import matplotlib.patches as mpatches import matplotlib as mpl mpl.rcParams['figure.dpi'] = 300 # turn on for higher-quality figure export from datetime import datetime, timedelta import platform import warnings import sys from IPython.core.display import display, HTML # display(HTML("")) # choose root directory for data files if platform.system() == 'Darwin': data_dir = '/Users/Ethan/Documents/Finances and records/2020-06-30 - Helmet Law Working Group/Data/' results_dir = '/Users/Ethan/Documents/Finances and records/2020-06-30 - Helmet Law Working Group/Figures/' elif platform.system() == 'Linux': data_dir = '/dat1/ethancc/CSG/' results_dir = data_dir # set directory paths current_results_dir = results_dir + '2021-12-03 - Seattle PD Field Contact analyses/' # #### Load compiled bike citation records # # Note: file created previously in Jupyter notebook [***csg_compile_king_county_bike_citations.ipynb***](https://github.com/ethan-campbell/Miscellaneous/blob/master/csg_compile_king_county_bike_citations.ipynb), which is available on my GitHub. # In[12]: kc_citations = pd.read_excel(data_dir + '2021-10-18 - compiled King County bike citation records.xlsx').drop(columns=['Unnamed: 0']) spd_citations = kc_citations[kc_citations['Law Enforcement Agency'] == 'Seattle Police Department'] spd_citations.head(3) # #### Load compiled King County jurisdiction citation statistics # # Note: file created previously in Jupyter notebook [***csg_analyze_king_county_bike_citations.ipynb***](https://github.com/ethan-campbell/Miscellaneous/blob/master/csg_analyze_king_county_bike_citations.ipynb), which is available on my GitHub. # In[17]: # load compiled statistics by city cities = pd.read_excel(data_dir + '2021-10-18 - King County helmet citation statistics.xlsx',index_col='Jurisdiction') cities.loc['Seattle'] # #### Load summaries of Seattle PD bicycle-related Field Contact narrative reports # # Note: records span May 7, 2019 to March 26, 2021; some narrative reports were redacted prior to release to public. # In[21]: # load SPD Field Contact report summaries spd_fc_summaries = pd.read_excel(data_dir + '2021-12-03 - Seattle PD Field Contact Report summaries - Installments 2-3.xlsx',index_col='Report Number') spd_fc_summaries.head(3) # #### Load Seattle PD bicycle-related Field Contact fields from Records Management System (RMS) data # # Note: records span May 7, 2019 to March 26, 2021, after Seattle PD began using Mark43 exclusively as their RMS. # In[402]: # load SPD Field Contact report entries spd_fc_data = pd.read_excel(data_dir + '2021-06-16 - Seattle PD Records Management System (RMS) Field Contacts (2019-05-07 to 2021-03-26) - all bike-related.xlsx', index_col='Report Number') # redact identifying information; display spd_fc_data.drop(columns=['Report Subtype','FCC Subject 1','FCC Subject 1 DOB','FCC Subject 1 Home Address Street Address', 'FCC Subject 1 Home Address Lat ','FCC Subject 1 Home Address Long','License Plate / Registration #','VIN #'], inplace=True) display(spd_fc_data.head(3)) # merge subject types into narrative summary entries subject_types = spd_fc_data.loc[spd_fc_summaries.index]['FCC Subject 1 Involvement Type'] subject_types = subject_types[~subject_types.index.duplicated()] spd_fc_summaries['Subject Types'] = subject_types # merge dates into narrative summary entries fc_dates = spd_fc_data.loc[spd_fc_summaries.index]['Date'] fc_dates = fc_dates[~fc_dates.index.duplicated()] spd_fc_summaries['Date'] = fc_dates # #### Field Contact narrative reports: summary statistics # In[370]: print('\nNumber of reports reviewed:',len(spd_fc_summaries)) print('\nBreakdown of types of Field Contacts:') display(spd_fc_summaries['FCC Type (redundant)'].value_counts()) print('\nBreakdown of types of Field Contacts (%):') display(100 * spd_fc_summaries['FCC Type (redundant)'].value_counts() / len(spd_fc_summaries)) print('\nFraction of Field Contacts involving a homeless subject, either noted by officer or inferred from clear narrative elements: {0:.1f}%'\ .format(100 * sum(spd_fc_summaries['Homelessness'].str.contains('Yes')) / len(spd_fc_summaries))) print('\nFraction of Field Contacts in which officer(s) were on mounted bike patrol: {0:.1f}%'\ .format(100 * sum(spd_fc_summaries['Officer on Bike Patrol'].str.contains('Yes')) / len(spd_fc_summaries))) print('\nFraction of Field Contacts originating from a bike helmet violation: {0:.1f}%'\ .format(100 * sum(spd_fc_summaries['Type of Bike Violation'].str.contains('Helmet violation')) / len(spd_fc_summaries))) print('\nFraction of Field Contacts in which a warrant check was conducted: {0:.1f}%'\ .format(100 * sum(spd_fc_summaries['Warrant Check'].str.contains('Yes')) / len(spd_fc_summaries))) print('\nResults of warrant checks (yes indicates an open warrant was identified):') display(spd_fc_summaries['Warrant Check Result'].value_counts()) print('\nResults of warrant checks (yes indicates an open warrant was identified) (%):') display(100 * spd_fc_summaries['Warrant Check Result'].value_counts() / sum(spd_fc_summaries['Warrant Check'].str.contains('Yes'))) print('\nOutcomes of Field Contacts:') display(spd_fc_summaries['Outcome'].value_counts()) print('\nOutcomes of Field Contacts (%):') display(100 * spd_fc_summaries['Outcome'].value_counts() / len(spd_fc_summaries)) print('\nSuspicion articulated for Terry Stops:') print(spd_fc_summaries[spd_fc_summaries['FCC Type (redundant)'] == 'Terry Stop']['Reason for Suspicion/Other Circumstances'].values) print('\nFraction of Terry Stops in which a bike violation (e.g., helmet violation) was the sole rationale originally articulated for the stop; \n' + \ 'i.e., cases in which suspicion developed after the stop was initiated: {0:.0f}% ({1} of {2})' .format(100 * 2 / spd_fc_summaries['FCC Type (redundant)'].value_counts()['Terry Stop'], 2,spd_fc_summaries['FCC Type (redundant)'].value_counts()['Terry Stop'])) print('\nNOTE: no suspicion was articulated in narrative reports for the remainder of Field Contacts that were not Terry Stops') # In[382]: # subset Field Contacts into (1) Terry Stops, and (2) 'Bike/infraction warning-no cite' spd_fc_terry_stops = spd_fc_data[spd_fc_data['FCC Type'] == 'Terry Stop'] spd_fc_bike_warning = spd_fc_data[spd_fc_data['FCC Type'] == 'Bike/infraction warning-no cite'] spd_fc_combined = spd_fc_data[logical_or(spd_fc_data['FCC Type'] == 'Terry Stop', spd_fc_data['FCC Type'] == 'Bike/infraction warning-no cite')] # summarize data_avail_in_months = (datetime(2021,3,26)-datetime(2019,5,7)) / timedelta(days=30.5) print('\nTime period of Field Contact data availability: {0:.1f} months, from {1} to {2}'\ .format(data_avail_in_months, datetime(2019,5,7).date(), datetime(2021,3,26).date())) print('\nSearch keywords used for public records request (plus "Biker" and "Bicycler", which returned no Field Contacts during this period):') display(spd_fc_data['Keyword'].value_counts()) print('\nFrom here on, we are specifically analyzing two largest types of Field Contacts for which subjects were reliably bicyclists\n in the narrative reports reviewed: \n' + \ '{0} Terry Stops and {1} "Bike/infraction warning-no cite" entries (ignoring less common categories - \n'\ .format(*spd_fc_data['FCC Type'].value_counts()[['Terry Stop','Bike/infraction warning-no cite']].values) + \ ' campsite clearances, other infractions, tresspass warnings, park violations, pedestrian violations/warnings, citizen tips, etc.)') susp_frac = spd_fc_bike_warning['FCC Subject 1 Involvement Type'].value_counts()[['suspect','Suspicious']].sum() \ / len(spd_fc_bike_warning) print('\nFraction of bike infraction warning entries involving suspicion of subject unrelated to infraction (i.e., pretextual stops):\n {0:.1f}%' .format(100 * susp_frac)) print('\nAnnual rate of police contacts based on these records:\n' + \ '- Terry Stops only: {0:.0f} per year (some may not be stops of bicyclists, but most likely are)\n' .format(12 * len(spd_fc_terry_stops) / data_avail_in_months) + \ '- Bike infraction warnings only: {0:.0f} per year\n' .format(12 * len(spd_fc_bike_warning) / data_avail_in_months) + \ '- Both combined: {0:.0f} per year\n' .format(12 * len(spd_fc_combined) / data_avail_in_months) + \ '- ** Estimated Terry Stops that develop solely from a bike infraction, based on 2-in-7 statistic from above: {0:.0f} per year\n' .format((2/7) * 12 * len(spd_fc_terry_stops) / data_avail_in_months) + \ '- ** Bike infraction warnings involving suspicion (pretextual stops): {0:.0f} per year\n' .format(susp_frac * 12 * len(spd_fc_bike_warning) / data_avail_in_months) + \ '\n ** = suspicion-related stops enabled by existing bicycle infractions') annual_citation_rate = len(spd_citations[logical_and(spd_citations['Violation Date'].dt.year >= 2015, spd_citations['Violation Date'].dt.year <= 2019)]) / 5 print('\nCompare sum of two final annual rates to annual rate of bike-related citations (to bicyclists) based on Seattle Municipal Court records:\n' + \ '- {0:.0f} per year'.format(annual_citation_rate)) print('\nFrequency of arrests resulting from Terry stops of bicyclists: {0:.1f}%'\ .format(100 * sum(spd_fc_terry_stops['FCC Disposition'].str.contains('Arrest',case=False)) \ / len(spd_fc_terry_stops))) print('\n\n-- DEMOGRAPHICS --') print('\n\nFraction of Field Contacts involving a homeless subject, as noted by officer in a check box RMS field:\n' + \ '- Terry Stops only: {0:.1f}%\n- Bike infraction warnings only: {1:.1f}%\n- Both combined: {2:.1f}%\n' .format(100 * sum(spd_fc_terry_stops['Event Statistics'].str.contains('Homelessness')) / len(spd_fc_terry_stops), 100 * sum(spd_fc_bike_warning['Event Statistics'].str.contains('Homelessness')) / len(spd_fc_bike_warning), 100 * sum(spd_fc_combined['Event Statistics'].str.contains('Homelessness')) / len(spd_fc_combined)) + \ '\n NOTE: the details in the narrative reports reviewed above indicate these percentages are likely an undercount') susp_mask = logical_or(spd_fc_bike_warning['FCC Subject 1 Involvement Type'] == 'suspect', spd_fc_bike_warning['FCC Subject 1 Involvement Type'] == 'Suspicious') spd_fc_bike_warning_susp = spd_fc_bike_warning[susp_mask] spd_fc_bike_warning_non_susp = spd_fc_bike_warning[~susp_mask] def ethnicity_disp(input_data): ethnicity_counts = input_data[input_data['FCC Subject 1 Race'] == 'White']['FCC Subject 1 Ethnicity'].value_counts().drop('Unknown') print() eth_summary = 100 * ethnicity_counts / ethnicity_counts.sum() display(eth_summary) return eth_summary print('\n\nAll recorded bike infraction stops resulting in warnings (n = {0}):'\ .format(spd_fc_bike_warning['FCC Subject 1 Race'].value_counts().sum())) demos_spd_fc_bike_warning = 100 * spd_fc_bike_warning['FCC Subject 1 Race'].value_counts(normalize=True) display(demos_spd_fc_bike_warning) eth_spd_fc_bike_warning = ethnicity_disp(spd_fc_bike_warning) print('\n\nPretextual stops (bike infraction stops resulting in warnings in which suspicion was a factor in the stop) (n = {0}):'\ .format(spd_fc_bike_warning_susp['FCC Subject 1 Race'].value_counts().sum())) demos_spd_fc_bike_warning_susp = 100 * spd_fc_bike_warning_susp['FCC Subject 1 Race'].value_counts(normalize=True) display(demos_spd_fc_bike_warning_susp) eth_spd_fc_bike_warning_susp = ethnicity_disp(spd_fc_bike_warning_susp) print('\n\nTerry stops of bicyclists (n = {0}):'\ .format(spd_fc_terry_stops['FCC Subject 1 Race'].value_counts().sum())) demos_spd_fc_terry_stops = 100 * spd_fc_terry_stops['FCC Subject 1 Race'].value_counts(normalize=True) display(demos_spd_fc_terry_stops) eth_spd_fc_terry_stops = ethnicity_disp(spd_fc_terry_stops) # In[242]: plt.figure(figsize=(10,5.5),facecolor='w') labels = ['White','Hispanic/Latino','Asian/Pacific Islander','Black','Other/Unknown'] colors = ['cornflowerblue','cornflowerblue','indianred','olivedrab','0.7'] bump_label = [0,0,0,-0.4,0] bump_label_v = [0,0,0,0,0] label_star = [0,0,0,0,0] y_baseline = 4.2 widths = array([demos_spd_fc_bike_warning['White'] * eth_spd_fc_bike_warning['Not Hispanic Or Latino']/100, demos_spd_fc_bike_warning['White'] * eth_spd_fc_bike_warning['Hispanic Or Latino']/100, demos_spd_fc_bike_warning['Asian'], demos_spd_fc_bike_warning['Black or African American'], demos_spd_fc_bike_warning['Unknown']]) h1 = plt.barh(y_baseline,widths,height=0.5,left=array([0,*cumsum(widths)[:-1]]),color=colors,edgecolor='w',linewidth=2.0) h1[1].set_hatch('/////') # for Hispanic/Latino category for w_idx, w in enumerate(widths): plt.text(array([0,*cumsum(widths)[:-1]])[w_idx] + w/2 + bump_label[w_idx], y_baseline - 0.4 + bump_label_v[w_idx], '{0:.1f}%{1}'.format(w,'$^*$'*label_star[w_idx]), horizontalalignment='center',fontstyle='italic',color='0.3',fontsize=9) plt.text(2,y_baseline+0.3,r'$\bf{Recorded~bicycle~infraction~stops~resulting~in~warnings}$ (Seattle PD, 2019-2021)') plt.text(86,y_baseline+0.34,'(n = {0})'.format(spd_fc_bike_warning['FCC Subject 1 Race'].value_counts().sum()), style='italic',color='0.3',verticalalignment='center',fontsize=9) labels = ['White','Hispanic/Latino','Black','Other/Unknown'] colors = ['cornflowerblue','cornflowerblue','olivedrab','0.7'] bump_label = [0,0,-0.4,0] bump_label_v = [0,0,0,0] label_star = [0,0,0,0] y_baseline = 3.0 widths = array([demos_spd_fc_bike_warning_susp['White'] * eth_spd_fc_bike_warning_susp['Not Hispanic Or Latino']/100, demos_spd_fc_bike_warning_susp['White'] * eth_spd_fc_bike_warning_susp['Hispanic Or Latino']/100, demos_spd_fc_bike_warning_susp['Black or African American'], demos_spd_fc_bike_warning_susp['Unknown']]) h2 = plt.barh(y_baseline,widths,height=0.5,left=array([0,*cumsum(widths)[:-1]]),color=colors,edgecolor='w',linewidth=2.0) h2[1].set_hatch('/////') # for Hispanic/Latino category for w_idx, w in enumerate(widths): plt.text(array([0,*cumsum(widths)[:-1]])[w_idx] + w/2 + bump_label[w_idx], y_baseline - 0.4 + bump_label_v[w_idx], '{0:.1f}%{1}'.format(w,'$^*$'*label_star[w_idx]), horizontalalignment='center',fontstyle='italic',color='0.3',fontsize=9) plt.text(2,y_baseline+0.3,r'$\bf{Recorded~pretextual~stops~of~cyclists}$$^\ddag$ (Seattle PD, 2019-2021)') plt.text(68,y_baseline+0.34,'(n = {0})'.format(spd_fc_bike_warning_susp['FCC Subject 1 Race'].value_counts().sum()), style='italic',color='0.3',verticalalignment='center',fontsize=9) plt.text(100,y_baseline-0.67,'$^\ddag$These represent bicycle infraction stops resulting in warnings in which ' + \ 'some unrelated suspicion was recorded as a factor in the stop', fontstyle='italic',horizontalalignment='right',color='0.5',fontsize=7) labels = ['White','Hispanic/Latino','Asian/Pacific Islander','Black', 'Native American/Alaska Native','Other/Unknown'] colors = ['cornflowerblue','cornflowerblue','indianred','olivedrab','gold','0.7'] bump_label = [0,0,0,-0.4,0.0,1.2] bump_label_v = [0,0,0,0,0,0] # if needed, drop by -0.17 label_star = [0,0,0,0,0,0] y_baseline = 1.6 widths = array([demos_spd_fc_terry_stops['White'] * eth_spd_fc_terry_stops['Not Hispanic Or Latino']/100, demos_spd_fc_terry_stops['White'] * eth_spd_fc_terry_stops['Hispanic Or Latino']/100, demos_spd_fc_terry_stops['Asian'] + demos_spd_fc_terry_stops['Native Hawaiian or Other Pacific Islander'], demos_spd_fc_terry_stops['Black or African American'], demos_spd_fc_terry_stops['American Indian or Alaska Native'], demos_spd_fc_terry_stops['Unknown']]) h3 = plt.barh(y_baseline,widths,height=0.5,left=array([0,*cumsum(widths)[:-1]]),color=colors,edgecolor='w',linewidth=2.0) h3[1].set_hatch('/////') # for Hispanic/Latino category for w_idx, w in enumerate(widths): plt.text(array([0,*cumsum(widths)[:-1]])[w_idx] + w/2 + bump_label[w_idx], y_baseline - 0.4 + bump_label_v[w_idx], '{0:.1f}%{1}'.format(w,'$^*$'*label_star[w_idx]), horizontalalignment='center',fontstyle='italic',color='0.3',fontsize=9) plt.text(2,y_baseline+0.3,r'$\bf{Inferred~Terry~stops~of~cyclists}$$^\S$ (Seattle PD, 2019-2021)') plt.text(62,y_baseline+0.34,'(n = {0})'.format(spd_fc_terry_stops['FCC Subject 1 Race'].value_counts().sum()), style='italic',color='0.3',verticalalignment='center',fontsize=9) plt.text(100,y_baseline-0.80,'$^\S$These were identified as containing one of the keywords "bicycle," "bike," "bicyclist," or "cyclist" ' + \ 'in the associated Field Contact report;\na small fraction (< ~15%) may represent stops by ' + \ 'officers on bicycle patrol or stops involving a bicycle, rather than stops of cyclists', fontstyle='italic',horizontalalignment='right',color='0.5',fontsize=7) plt.ylim([0,4.7]) plt.legend(h3,labels,ncol=3,frameon=False,loc='lower left',prop={'size':9.5}); plt.xticks([]); plt.yticks([]) plt.gca().spines['top'].set_visible(False); plt.gca().spines['bottom'].set_visible(False) plt.gca().spines['left'].set_visible(False); plt.gca().spines['right'].set_visible(False) plt.savefig(current_results_dir + 'spd_field_contact_racial_demographics.pdf') # In[403]: # housing status demographics from review of 40 narrative Field Contact reports fc_report_housing = 100 * spd_fc_summaries['Homelessness'].value_counts(normalize=True,dropna=False) presume_homeless_ratio = (fc_report_housing['Yes'] + fc_report_housing['Yes (presumed)']) / fc_report_housing['Yes'] fc_data_homeless_recorded = 100 * sum(spd_fc_combined['Event Statistics'].str.contains('Homelessness')) / len(spd_fc_combined) fc_data_homeless_inferred = fc_data_homeless_recorded * presume_homeless_ratio def autopct(percent): return ('%.0f%%' % percent) plt.figure(figsize=(6,4),facecolor='w') wedges, labels, autopct = plt.pie([(100-fc_data_homeless_inferred),fc_data_homeless_inferred], explode=tile(0.01,2),startangle=90, labels=['Not\nindicated','Homeless'], colors=['0.4','0.7'], labeldistance=1.10,autopct=autopct,pctdistance=0.75) autopct[0].set_color('w') for lab in labels: lab.set_fontsize(11) for pct in autopct: pct.set_fontsize(10) plt.title('Housing status in bicyclist Field Contact reports\n(2019-2021)'); # RE: text below... see updated version in Keynote presentation for OIG, 12/14/21 # plt.text(0.5,0.05,"Note: the 'homelessness' box was selected by officers in {0:.0f}% of $n$ = {1} reports; however,\nthe frequency of homelessness mentioned in a sample of Field Contact narratives ($n$ = {2})\nsuggests a scaling factor of ~{3:.1f}x to account for underreporting.".format(fc_data_homeless_recorded,len(spd_fc_combined),len(spd_fc_summaries),presume_homeless_ratio), # fontsize=7,fontstyle='italic',horizontalalignment='center',transform=plt.gcf().transFigure); plt.tight_layout() plt.savefig(current_results_dir + 'spd_field_contact_homelessness.pdf') # In[404]: # frequency distribution of all Seattle bicycle citations spd_citations['Law Description'].value_counts(normalize=True) # In[ ]: