For questions, contact me at ethanchenbell@gmail.com.
For analysis of the data compiled below, refer to the companion notebook on my GitHub: *csg_analyze_king_county_bike_citations.ipynb*.
%matplotlib inline
from numpy import *
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib as mpl
# mpl.rcParams['figure.dpi'] = 300 # turn on for higher-quality figure export
from datetime import datetime, timedelta
import platform
import warnings
import sys
from IPython.core.display import display, HTML
# display(HTML("<style>.container { width:100% !important; }</style>"))
# choose root directory for data files
if platform.system() == 'Darwin':
data_dir = '/Users/Ethan/Documents/Finances and records/2020-06-30 - Helmet Law Working Group/Data/'
elif platform.system() == 'Linux':
data_dir = '/dat1/ethancc/CSG/'
# filepaths
aoc_dir = data_dir + '2021-06-22 - WA State Administrative Office of the Courts data/'
# load bike infraction data from WA State Administrative Office of the Courts
# note: "KCDC" = King County District Court
# "KCMC" / "MCs" = municipal courts in King County
kcdc_citations = pd.read_excel(aoc_dir + '210419-000051_KCDC_Bicycle_Viols - updated.xlsx',header=2,skipfooter=1)
kcmc_citations = pd.read_excel(aoc_dir + '210419-000051_KCMC_Bicycle_Viols - updated.xlsx',header=3,sheet_name='Bicycle Charges')
kcmc_lea_data = pd.read_excel(aoc_dir + '210419-000051_KCMC_Bicycle_Viols - updated.xlsx',header=2,sheet_name='LEA')
kcmc_lfo_data = pd.read_excel(aoc_dir + '210419-000051_KCMC_Bicycle_Viols - updated.xlsx',header=2,sheet_name='LFOs')
# merge in data on officers, badge numbers, and accounts receivables (AR) charges (all provided for municipal courts only)
kcmc_lea_data = kcmc_lea_data.drop(columns=['Court Name','Case Token','Case LEA Name'])
kcmc_citations = kcmc_citations.merge(kcmc_lea_data,how='left',on='Case Number')
kcmc_lfo_data = kcmc_lfo_data.drop(columns=['Court Name','Case Token'])
kcmc_citations = kcmc_citations.merge(kcmc_lfo_data,how='left',on='Case Number')
# standardize column names
kcdc_citations = kcdc_citations.rename(columns={'Case Key':'Case Key (KCDC) or Token (MCs)',
'Case Type Code & Desc':'Case Type',
'Case LEA':'Law Enforcement Agency',
'Charge Violation Date':'Violation Date',
'Charge Law Number':'Law Code',
'Charge Law':'Law Description',
'Charge Result':'Disposition',
'Charge Result Date':'Disposition Date',
'Case Participant Actor Name':'Defendant Name',
'Actor Person Gender':'Defendant Gender',
'Actor Person Race':'Defendant Race',
'Actor Person Ethnicity':'Defendant Ethnicity'})
kcmc_citations = kcmc_citations.rename(columns={'Case Token':'Case Key (KCDC) or Token (MCs)',
'Case LEA Name':'Law Enforcement Agency',
'Case Citation Charge Date':'Violation Date',
'Charge Law Number':'Law Code',
'Charge':'Law Description',
'Charge Disposition':'Disposition',
'Charge Disposition Date':'Disposition Date',
'Person Name':'Defendant Name',
'Person Gender':'Defendant Gender',
'Person Race':'Defendant Race',
'Person Ethnicity':'Defendant Ethnicity',
'Official Name':'Officer Name',
'Official Officer Badge Number':'Officer Badge Number'})
# redact identifying defendant data and unneeded columns
# NOTE: keep only Federal Way Municipal Court defendant names to allow checking for duplicate citations with separate FWMC data
kcdc_citations = kcdc_citations.drop(columns=['#Age at Viol Dt','Case Participant Type Code'])
kcmc_citations = kcmc_citations.drop(columns=['#Age at Viol Dt','Specific Participant Type Code'])
kcdc_citations['Defendant Name'] = NaN
kcmc_citations.loc[kcmc_citations['Court Name'] != 'FEDERAL WAY MUNICIPAL COURT','Defendant Name'] = NaN
# merge KCDC and MC data
kc_citations = pd.concat([kcmc_citations,kcdc_citations],ignore_index=True)
# flag all as originating from query for helmet and non-helmet bike violations
kc_citations['Originating Query'] = 'All bike violations (requested KCHC 9.10.010 and 9.15.010, RCW sections, WAC sections, and local code chapters)'
# remove one citation under King County Code 14A.40.010d (requiring motorized foot scooter users to wear a helmet)
kc_citations = kc_citations[kc_citations['Law Code'] != 'kcc14a.40.010.d']
# remove three concurrent citations for single person by North Bend Police Department, two of which are "bicycle" citations
# but the third of which is under NBMC 10.20.030 (requiring motorized foot scooter users to wear a helmet)
kc_citations = kc_citations[kc_citations['Case Number'] != '4Z0454185']
# remove one citation under Kent Municipal Code 9.44.050 (requiring motorized foot scooter users to wear a helmet)
kc_citations = kc_citations[kc_citations['Law Code'] != '9.44.050']
kc_citations['Court Name'] = kc_citations['Court Name'].str.title()
kc_citations.replace({'Court Name':{'King District Court - Kcdc':'King County District Court',
'Kcdc Auburn Courthouse':'King County District Court',
'Seatac Municipal Court':'SeaTac Municipal Court'}},inplace=True)
kc_citations.replace({'Case Type':{'IN - Infraction':'Infraction',
'CR - Criminal':'Criminal',
"Defendant's race":'Infraction',
'Limited to the defendant':'Infraction',
'Calculated by days between defendant birth date and charge violation date':'Infraction',
'defendant gender':'Infraction'}},inplace=True)
kc_citations['Law Enforcement Agency'] = kc_citations['Law Enforcement Agency'].str.title()
kc_citations.replace({'Law Enforcement Agency':{'Maple Valley Police Department':"King County Sheriff's Office / Maple Valley Police Department",
'Burien Police Department':"King County Sheriff's Office / Burien Police Department",
'Covington Police Department':"King County Sheriff's Office / Covington Police Department",
'Kenmore City Of':"King County Sheriff's Office / Kenmore Police Department",
'Woodinville Police Department':"King County Sheriff's Office / Woodinville Police Department",
'Sammamish Police Department':"King County Sheriff's Office / Sammamish Police Department",
'King Co Police Department':"King County Sheriff's Office",
'Lake Forest Park Police Dept':'Lake Forest Park Police Department',
'Port Of Seattle Police Department':'Port of Seattle Police Department',
'Seatac Police Department':"King County Sheriff's Office / SeaTac Police Department",
'Shoreline City Of':"King County Sheriff's Office / Shoreline Police Department",
'University Of Washington Police':'University Of Washington Police Department',
'Wa St Patrol':'Washington State Patrol'}},inplace=True)
kc_citations.loc[logical_and(kc_citations['Law Enforcement Agency'] == 'North Bend Police Department',
kc_citations['Violation Date'] <= datetime(2014,3,8)),
'Law Enforcement Agency'] = "King County Sheriff's Office / North Bend Police Department"
kc_citations.loc[logical_and(kc_citations['Law Enforcement Agency'] == 'North Bend Police Department',
kc_citations['Violation Date'] > datetime(2014,3,8)),
'Law Enforcement Agency'] = "Snoqualmie/North Bend Police Department"
kc_citations.loc[logical_and(kc_citations['Law Enforcement Agency'] == 'Snoqualmie Police Department',
kc_citations['Violation Date'] > datetime(2014,3,8)),
'Law Enforcement Agency'] = "Snoqualmie/North Bend Police Department"
kc_citations['Law Description'] = kc_citations['Law Description'].str.title()
kc_citations.replace({'Law Description':{'Bicycle Helmet Regulations':'Bicycle Helmet Required',
'Bicycle Helmet Violation':'Bicycle Helmet Required',
'Bicycle Helmets':'Bicycle Helmet Required',
'Fail To Wear Bicycle Helmet':'Bicycle Helmet Required',
'Helmet Required':'Bicycle Helmet Required',
'Helmet Required Riding Bicycle':'Bicycle Helmet Required',
'Helmet Required When Riding Bicyle':'Bicycle Helmet Required',
'No Bicycle Helmet':'Bicycle Helmet Required',
'No Bicyle Helmet':'Bicycle Helmet Required',
'Require Bicycle Helmet Guardian Of':'Bicycle Helmet Required (Guardian of Minor)',
'Requirement Re Bicycle Helmet':'Bicycle Helmet Required',
'Riding Bicycle W/O Helmet':'Bicycle Helmet Required',
'Riding Bicycle W/Out Helmet':'Bicycle Helmet Required',
'Riding Bicycle Without Helmet':'Bicycle Helmet Required',
'Bicycle Fail To Yield':'Bicycle Failure to Yield',
'Bicycle Leaving Curb X-Walk Spd Zn':'Bicycle Leaving Curb or Crosswalk in Speed Zone',
'No Bicycle Lights As Required':'Bicycle Lights Required',
'Bicycle Riding On Sidewalk':'Bicycle Riding on Sidewalk',
'Bicycle Riding On Sidewalks':'Bicycle Riding on Sidewalk',
'Defective Equipment-Bicycle':'Bicycle Defective Equipment',
'Bicycle-Fail To Stop At Stop Sign':'Bicycle Failure to Stop at Stop Sign',
'Bicycle-Negligence':'Bicycle Negligence',
'Bicycles Obedience To Traffic Dev':'Bicycle Obedience to Traffic Control Devices',
'Bicycles Ride On Right Side':'Riding Bicycle on Right Side',
'Bike On Sidewalk In Business Dist':'Bicycle on Sidewalk in Business District',
'Carrying Articles On Bicycle':'Carrying Articles on Bicycle',
'Fld To Give Hand Signals/Bike':'Bicycle Failure to Give Hand Signals',
'Improper Operation Of Bicycle':'Improper Operation of Bicycle',
'No Bicycles On Skate Park':'No Bicycles on Skate Park',
'Ride Bicycle On Transit Property':'Riding Bicycle on Transit Property',
'Riding Bicycle On Sidewalk/Path':'Riding Bicycle on Sidewalk or Path',
'Riding Bicycles On Right Side':'Riding Bicycle on Right Side',
'Speeding On Bicycle Trail':'Speeding on Bicycle Trail',
'Unlawful Riding On Bicycle':'Unlawful Riding on Bicycle',
'Violating Laws While Riding Bike':'Violating Laws While Riding Bicycle'}},inplace=True)
kc_citations.loc[logical_and(kc_citations['Law Code'] == '46.61.780',
kc_citations['Law Description'] == 'Bicycle Violation'),
'Law Description'] = 'Bicycle Defective Equipment'
kc_citations.replace({'Disposition':{'Dismissed W/Prejudice':'Dismissed with Prejudice',
'Dismissed W/O Prejudice':'Dismissed without Prejudice',
'Dismissed After Deferral':'Dismissed after Deferral',
'Committed After Deferral':'Committed after Deferral'}},inplace=True)
kc_citations.replace({'Defendant Race':{'White (Caucasian)':'White',
'Black (African American)':'Black',
'Asian':'Asian or Pacific Islander',
'Unknown':'Unknown/Other',
'Multiracial':'Unknown/Other'}},inplace=True)
kc_citations.replace({'Defendant Ethnicity':{'Hispanic':'Hispanic/Latino',
'Hispanic or Latino':'Hispanic/Latino',
'Non-Hispanic':'Not Hispanic/Latino',
'Not Hispanic or Latino':'Not Hispanic/Latino'}},inplace=True)
kc_citations['Officer Name'] = kc_citations['Officer Name'].str.title()
combined_officer_strings = [name for name in kc_citations['Officer Name'].value_counts().index if ' ' in name]
fixed_officer_strings = [name.split(' ')[0] for name in kc_citations['Officer Name'].value_counts().index if ' ' in name]
for ofc_idx, ofc_name in enumerate(combined_officer_strings):
kc_citations.replace({'Officer Name':{ofc_name:fixed_officer_strings[ofc_idx]}},inplace=True)
kc_citations.replace({'Law Code':{'D.10.12.010(1)':'Des Moines Municipal Code 10.12.010 or 10.12.010(1)',
'ECM1804':'[Enumclaw - no match found] "ECM 1804"',
'ECM1807':'[Enumclaw - no match found] "ECM 1807"',
'MVO.8.15.030A':'Maple Valley Municipal Code 8.15.030 or 8.15.030A',
'M8.15.030':'Maple Valley Municipal Code 8.15.030 or 8.15.030A',
'FWRC8.25.060':'Federal Way Revised Code 8.25.060',
'FWC15.196':'Federal Way Revised Code 8.25.060',
'IQM10.44.025':'Issaquah Municipal Code 10.44.025',
'SNL10.30.050':'Snoqualmie Municipal Code 10.30.050',
'K9.41.030':'Kent City Code 9.41.030',
'9.41.030':'Kent City Code 9.41.030',
'LFM9.18.020':'Lake Forest Park Municipal Code 9.18.020',
'10.54.080':'Pacific Municipal Code 10.54.080',
'P.10.54.080':'Pacific Municipal Code 10.54.080',
'R6-26(3)(A)':'Renton Municipal Code 6-26-3(A) or 6-26-7',
'R6-26-7':'Renton Municipal Code 6-26-3(A) or 6-26-7',
'10.56.185':'Auburn City Code 10.56.185',
'amc 10.56.185':'Auburn City Code 10.56.185',
'amc10.56.185':'Auburn City Code 10.56.185',
'ke9.05.450':'Kenmore Municipal Code 9.05.450 (partially adopts KCHC Title 9 by reference)',
'be11.60.090':'Bellevue City Code 11.60.090',
'bu8.37.050':'Burien Municipal Code 8.37.050 or 8.37.050.1',
'bu8.37.050.1':'Burien Municipal Code 8.37.050 or 8.37.050.1',
'KCHC9.10.010A':'King County Health Code 9.10.010A or 9.15.010',
'9.10.010.a':'King County Health Code 9.10.010A or 9.15.010',
'9.15.010':'King County Health Code 9.10.010A or 9.15.010',
'9.10.010.b':'King County Health Code 9.10.010B'}},inplace=True)
kc_citations.replace({'Law Code':{'46.61.780':'RCW 46.61.780',
'46.61.755':'RCW 46.61.755',
'46.61.770':'RCW 46.61.770',
'46.61.758':'RCW 46.61.758',
'46.61.775':'RCW 46.61.775',
'46.61.760':'RCW 46.61.760',
'46.61.235.2B':'RCW 46.61.235.2B',
'46.61.235.2b':'RCW 46.61.235.2B',
'46.61.235.5-2b':'RCW 46.61.235.2B/5',
'308.330.545':'WAC 308-330-545',
'308.330.555':'WAC 308-330-555',
'308-330-555':'WAC 308-330-555',
'KCC7.12.295.A':'King County Code 7.12.295 or 7.12.295.A',
'28.96.010.a4':'King County Code 28.96.010.A4',
'amc 10.56.110':'Auburn City Code 10.56.110',
'amc10.56.110':'Auburn City Code 10.56.110',
'10.56.110':'Auburn City Code 10.56.110',
'10.56.120':'Auburn City Code 10.56.120',
'10.56.140':'Auburn City Code 10.56.140',
'amc 10.56.140':'Auburn City Code 10.56.140',
'LFMORD798.2D':'Lake Forest Park Ordinance 798.2D',
'LFM798.2B':'Lake Forest Park Ordinance 798.2B',
'FWRC8.25.020.1':'Federal Way Revised Code 8.52.010.1',
'smc11.44.120':'Seattle Municipal Code 11.44.120',
'BDM8.28.040':'Black Diamond Municipal Code 8.28.040'}},inplace=True)
dmmc_citations = pd.read_excel(data_dir + '2020-07-21 - Des Moines Municipal Court helmet infraction data - transcribed.xlsx',
header=0)
# identify records not already present in existing data
new_indices = [idx for idx, case_num in enumerate(dmmc_citations['Case Number']) \
if case_num not in kc_citations['Case Number'].values]
dmmc_citations = dmmc_citations.loc[new_indices]
display(dmmc_citations)
# note: no new records to merge
Court Name | Case Number | Violation Date | Law Code | Law Description | Officer Name | Officer Badge Number | Defendant Race | Defendant Gender |
---|
rmc_citations = pd.read_excel(data_dir + '2020-07-22 - Renton Municipal Court helmet infraction data (2007-08-20 to 2020-06-30).xlsx',
header=4)
# redact identifying defendant data and unneeded columns
rmc_citations = rmc_citations.drop(columns=['Case Def Name'])
# identify records not already present in existing data
new_indices = [idx for idx, case_num in enumerate(rmc_citations['Case #']) \
if case_num not in kc_citations['Case Number'].values]
rmc_citations = rmc_citations.loc[new_indices]
# display(rmc_citations)
# drop columns with missing/poor/unneeded data
rmc_citations = rmc_citations.drop(columns=['Unnamed: 2','Type'])
# standardize column names
rmc_citations = rmc_citations.rename(columns={'Filed Date':'Case File Date',
'Case #':'Case Number',
'LEA':'Law Enforcement Agency',
'Official Officer Badge Number':'Officer Badge Number',
'Viol Date':'Violation Date',
'Person Gender Code':'Defendant Gender',
'Person Race Code':'Defendant Race',
'Person Ethnicity Code':'Defendant Ethnicity',
'Dispo Date':'Disposition Date',
'Dispo':'Disposition'})
# standardize data
rmc_citations.replace({'Law Enforcement Agency':{'RPD':'Renton Police Department'},
'Defendant Gender':{'M':'Male'},
'Defendant Race':{'W':'White'},
'Defendant Ethnicity':{'U':'Unknown'},
'Disposition':{'DO':'Unknown ("DO")',
'C':'Committed'}},inplace=True)
# add missing columns
rmc_citations['Court Name'] = 'Renton Municipal Court (via PDR)'
rmc_citations['Case Type'] = 'Infraction'
rmc_citations['Law Code'] = 'Renton Municipal Code 6-26-3(A) or 6-26-7'
rmc_citations['Law Description'] = 'Bicycle Helmet Required'
rmc_citations['Originating Query'] = 'Helmet violations only (requested RMC 6-26)'
# add to rest of data
kc_citations = pd.merge(kc_citations,rmc_citations,how='outer')
Note: missing cities, towns, and jurisdictions (i.e., unincorporated King County) were filled in by looking up given addresses and cross-streets on Google Maps and the Unincorporated King County Community Service Areas map prior to loading the spreadsheet below.
kcso_citations = pd.read_excel(data_dir + "2020-07-27 - King County Sheriff's Office helmet infraction data (2010-03-16 to 2019-08-22) - missing jurisdictions filled in.xlsx",
header=0)
# redact identifying defendant data and unneeded columns
kcso_citations = kcso_citations.drop(columns=['Age','Height','Weight','Eye Color','Hair Color',
'Officer Notes','Case Number',
'Primary Trafficway','Block Number','Reference Trafficway'])
# standardize column names
kcso_citations['Violation Date'] = kcso_citations['Date / Time of Incident'].dt.date
kcso_citations = kcso_citations.rename(columns={'Date / Time of Incident':'Violation Datetime',
'Ticket Number':'Case Number',
'Citation Type':'Case Type',
'Violation Code':'Law Code',
'Violation Title':'Law Description',
'Agency':'Law Enforcement Agency',
'Badge':'Officer Badge Number',
'Officer':'Officer Name',
'Sex':'Defendant Gender',
'Race':'Defendant Race'})
# split records representing multiple citations into two separate lines
kcso_citations = (kcso_citations.set_index([col_name for col_name in kcso_citations.keys() \
if col_name not in ['Law Code','Law Description']])
.apply(lambda col: col.str.split(';').explode())
.reset_index()
.reindex(kcso_citations.columns, axis=1))
# standardize data
kcso_citations.replace({'Law Enforcement Agency':{'Kenmore PD':"King County Sheriff's Office / Kenmore Police Department",
'Covington PD':"King County Sheriff's Office / Covington Police Department",
'Maple Valley PD':"King County Sheriff's Office / Maple Valley Police Department",
'Newcastle PD':"King County Sheriff's Office / Newcastle Police Department",
'Metro Transit Police':"King County Sheriff's Office / King County Metro Transit Police",
'Shoreline PD':"King County Sheriff's Office / Shoreline Police Department",
'Woodinville PD':"King County Sheriff's Office / Woodinville Police Department",
'Sammamish PD':"King County Sheriff's Office / Woodinville Police Department"},
'City':{'Vashon Island':'Unincorporated King County',
'Vashon':'Unincorporated King County',
'White Center':'Unincorporated King County',
'Queen Anne':'Maple Valley'},
'Defendant Gender':{'M':'Male','F':'Female'},
'Defendant Race':{'Unknown':'Unknown/Other',
'American Indian':'American Indian or Alaskan Native'},
'Case Type':{'Infraction Non-traffic':'Infraction Non-Traffic'}},inplace=True)
kcso_citations.loc[kcso_citations['Defendant Race'] == 'Hispanic','Defendant Ethnicity'] = 'Hispanic/Latino'
kcso_citations.loc[kcso_citations['Defendant Race'] != 'Hispanic','Defendant Ethnicity'] = 'Unknown'
kcso_citations['Law Description'] = kcso_citations['Law Description'].str.title()
kcso_citations.replace({'Law Description':{'Requirement Re Bicycle Helmet':'Bicycle Helmet Required',
'Bicycle Helmet Violation':'Bicycle Helmet Required',
'Bicycle Helmets':'Bicycle Helmet Required',
'Helmet Required':'Bicycle Helmet Required',
'Fail To Wear Bicycle Helmet':'Bicycle Helmet Required',
' Fail To Wear Bicycle Helmet':'Bicycle Helmet Required',
'Ride Bike W/O Helmet':'Bicycle Helmet Required',
'Require Bicycle Helmet Guardian Of':'Bicycle Helmet Required (Guardian of Minor)',
' Trail Use Violation':'Trail Use Violation',
'Pedestrian On Roadway Unlawfully':'Pedestrian on Roadway Unlawfully'}},inplace=True)
kcso_citations.replace({'Law Code':{'9.10.010.A':'King County Health Code 9.10.010A or 9.15.010',
'9.15.010':'King County Health Code 9.10.010A or 9.15.010',
'9.10.010.B':'King County Health Code 9.10.010B',
' KCC.7.12.295':'King County Code 7.12.295 or 7.12.295.A',
'KE9.05.450':'Kenmore Municipal Code 9.05.450 (partially adopts KCHC Title 9 by reference)',
'BE11.60.090':'Bellevue City Code 11.60.090',
' BE11.60.090':'Bellevue City Code 11.60.090',
'M8.15.030':'Maple Valley Municipal Code 8.15.030 or 8.15.030A',
'MVO.8.15.030A':'Maple Valley Municipal Code 8.15.030 or 8.15.030A',
'M8.15.030A':'Maple Valley Municipal Code 8.15.030 or 8.15.030A',
'46.61.250.1':'RCW 46.61.250.1'}},inplace=True)
# identify records already present in existing data
# extract columns to add to (or check with) existing data
existing_indices = [idx for idx, case_num in enumerate(kcso_citations['Case Number']) \
if case_num in kc_citations['Case Number'].values]
existing_kcso_citations = kcso_citations.loc[existing_indices][['Case Number','Law Enforcement Agency','City',
'Officer Badge Number','Officer Name']]
# merge into rest of data (overwrite existing columns)
for idx in existing_kcso_citations.index:
for col_name in existing_kcso_citations.keys()[1:]:
kc_citations.loc[kc_citations['Case Number'] == existing_kcso_citations.loc[idx,'Case Number'],col_name] = \
existing_kcso_citations.loc[idx,col_name]
# identify records not already present in existing data
new_indices = [idx for idx, case_num in enumerate(kcso_citations['Case Number']) \
if case_num not in kc_citations['Case Number'].values]
new_kcso_citations = kcso_citations.loc[new_indices]
# add missing columns
new_kcso_citations['Court Name'] = "Unknown (King County Sheriff's Office records)"
new_kcso_citations['Disposition'] = 'Unknown'
new_kcso_citations['Originating Query'] = 'Helmet violations only (requested KCHC 9.10.010 and 9.15.010)'
# add to rest of data
kc_citations = pd.merge(kc_citations,new_kcso_citations,how='outer')
stmc_citations = pd.read_excel(data_dir + '2020-08-06 - SeaTac Municipal Court helmet infraction data - transcribed copy.xlsx',
header=0)
# redact identifying defendant data and unneeded columns
stmc_citations = stmc_citations.drop(columns=['Defendant Name'])
# identify records not already present in existing data
new_indices = [idx for idx, case_num in enumerate(stmc_citations['Case Number']) \
if case_num not in kc_citations['Case Number'].values]
stmc_citations = stmc_citations.loc[new_indices]
# display(stmc_citations)
# approximate violation date as case file date
stmc_citations['Violation Date'] = stmc_citations['Case File Date'].copy()
# add missing columns
stmc_citations['Court Name'] = 'SeaTac Municipal Court (via PDR)'
stmc_citations['Law Enforcement Agency'] = "King County Sheriff's Office / SeaTac Police Department"
stmc_citations['Case Type'] = 'Infraction'
stmc_citations['Disposition'] = 'Unknown'
stmc_citations['Defendant Gender'] = 'Unknown'
stmc_citations['Defendant Race'] = 'Unknown/Other'
stmc_citations['Defendant Ethnicity'] = 'Unknown'
stmc_citations['Originating Query'] = 'Helmet violations only (requested KCHC 9.10.010 and 9.15.010, STMC 9.30.020 and 9.30.060)'
# add to rest of data
kc_citations = pd.merge(kc_citations,stmc_citations,how='outer')
fwmc_citations = pd.read_excel(data_dir + '2020-08-05 - Federal Way Municipal Court helmet infraction data (2010-06-14 to 2014-06-08).xlsx',
header=0,parse_dates=[0])
# match missing citation numbers where possible; otherwise, assign unique citation numbers with "Unknown_FWMC" in their names
unknown_counter = 1
for idx in fwmc_citations.index:
if fwmc_citations.loc[idx,'Citation Number'] == 'Unknown':
citation_match = logical_and.reduce((kc_citations['Court Name'] == 'Federal Way Municipal Court',
kc_citations['Violation Date'] == fwmc_citations.loc[idx,'Date Issued'],
kc_citations['Defendant Name'].str.split(',').str[0] \
== fwmc_citations.loc[idx,'Defendant'].split(',')[0]))
if sum(citation_match) > 0:
fwmc_citations.loc[idx,'Citation Number'] = kc_citations.loc[where(citation_match),'Case Number'].values
else:
fwmc_citations.loc[idx,'Citation Number'] = f'Unknown_FWMC_Assigned_{unknown_counter:03}'
unknown_counter += 1
# redact identifying defendant data and unneeded columns
fwmc_citations = fwmc_citations.drop(columns=['Defendant'])
# also redact identifying defendant data from AOC records
kc_citations = kc_citations.drop(columns=['Defendant Name'])
# identify records not already present in existing data
new_indices = [idx for idx, case_num in enumerate(fwmc_citations['Citation Number']) \
if case_num not in kc_citations['Case Number'].values]
fwmc_citations = fwmc_citations.loc[new_indices]
# display(fwmc_citations)
# drop columns with missing/poor/unneeded data
fwmc_citations = fwmc_citations.drop(columns=['Race/Gender','Citation Location','Officer','Unnamed: 8'])
# standardize column names
fwmc_citations = fwmc_citations.rename(columns={'Date Issued':'Violation Date',
'Offense Description':'Law Description',
'Citation Number':'Case Number',
'Badge #':'Officer Badge Number'})
# standardize data
fwmc_citations.replace({'Law Description':{'RIDING BICYCLE W/O HELMET':'Bicycle Helmet Required'}},inplace=True)
# add missing columns
fwmc_citations['Court Name'] = 'Federal Way Municipal Court (via PDR)'
fwmc_citations['Law Enforcement Agency'] = 'Federal Way Police Department'
fwmc_citations['Case Type'] = 'Infraction'
fwmc_citations['Law Code'] = 'Federal Way Revised Code 8.25.060'
fwmc_citations['Disposition'] = 'Unknown'
fwmc_citations['Defendant Gender'] = 'Unknown'
fwmc_citations['Defendant Race'] = 'Unknown/Other'
fwmc_citations['Defendant Ethnicity'] = 'Unknown'
fwmc_citations['Originating Query'] = 'Helmet violations only (requested FWRC 8.25.060)'
# add to rest of data
kc_citations = pd.merge(kc_citations,fwmc_citations,how='outer')
kmc_citations = pd.read_excel(data_dir + '2020-07-28 - Kent Municipal Court helmet infraction data (2007-10-30 to 2020-06-15).xlsx',
header=4,parse_dates=[0,6,12])
# nullify invalid date entries
kmc_citations.loc[kmc_citations['Dispo Date'] < datetime(1990,1,1),'Dispo Date'] = NaN
# fill duplicate rows with valid data from above or below, then remove duplicates
kmc_citations['Official Officer Badge Number'].fillna(method='ffill',axis=0,limit=1,inplace=True)
kmc_citations['Person Race'].fillna(method='bfill',axis=0,limit=1,inplace=True)
kmc_citations['Person Gender'].fillna(method='bfill',axis=0,limit=1,inplace=True)
kmc_citations = kmc_citations.drop_duplicates().reset_index(drop=True)
# redact identifying defendant data and unneeded columns
kmc_citations.drop(columns=['Case Def Name','Unnamed: 2','Unnamed: 9','Unnamed: 11'],inplace=True)
# identify records not already present in existing data
new_indices = [idx for idx, case_num in enumerate(kmc_citations['Case #']) \
if case_num not in kc_citations['Case Number'].values]
kmc_citations = kmc_citations.loc[new_indices]
# standardize column names
kmc_citations = kmc_citations.rename(columns={'Filed Date':'Case File Date',
'Case #':'Case Number',
'Type':'Case Type',
'LEA':'Law Enforcement Agency',
'Official Officer Badge Number':'Officer Badge Number',
'Viol Date':'Violation Date',
'Person Race':'Defendant Race',
'Person Gender':'Defendant Gender',
'Dispo Date':'Disposition Date',
'Dispo':'Disposition'})
# standardize data
kmc_citations.replace({'Case Type':{'IN':'Infraction',
'IT':'Infraction Traffic'},
'Law Enforcement Agency':{'KNP':'Kent Police Department'},
'Defendant Race':{'Asian':'Asian or Pacific Islander'},
'Disposition':{'C':'Committed',
'P':'Paid',
'D':'Dismissed'}},inplace=True)
# add missing columns
kmc_citations['Court Name'] = 'Kent Municipal Court (via PDR)'
kmc_citations['Law Code'] = 'Kent City Code 9.41.030'
kmc_citations['Law Description'] = 'Bicycle Helmet Required'
kmc_citations['Defendant Ethnicity'] = 'Unknown'
kmc_citations['Originating Query'] = 'Helmet violations only (requested KCHC 9.10.010 and 9.15.010, KCC 9.41.030 and 9.41.070)'
# add to rest of data
kc_citations = pd.merge(kc_citations,kmc_citations,how='outer')
for row_idx in kc_citations.index:
ofc_name = kc_citations.loc[row_idx,'Officer Name']
if type(ofc_name) == float: # NaN value
pass
else:
ofc_name = ofc_name.replace('.',' ')
if ',' in ofc_name: # assume single comma and order: last, first (middle)
last, first_middle = ofc_name.split(',')
first_middle = first_middle.split()
if len(first_middle) == 1:
first = first_middle[0]
middle = NaN
elif len(first_middle) == 2:
if len(first_middle[1]) == 1:
first, middle = first_middle
else:
first = first_middle[0] + ' ' + first_middle[1]
middle = NaN
else:
print('ERROR: found first + middle name with over 2 components')
else: # assume order: first (middle) last
components = ofc_name.split()
if len(components) == 2:
first, last = components
middle = NaN
elif len(components) == 3:
if len(components[1]) == 1:
first, middle, last = components
else: # ambiguous if 2nd component is part of first or last name; assume two-part first name
first = components[0] + ' ' + components[1]
last = components[2]
middle = NaN
elif len(components) == 4:
if len(components[1]) == 1:
first, middle, last1, last2 = components
last = last1 + ' ' + last2
elif len(components[2]) == 1:
first1, first2, middle, last = components
first = first1 + ' ' + first2
else: # ambiguous
first1, first2, last1, last2 = components
first = first1 + ' ' + first2
last = last1 + ' ' + last2
middle = NaN
else:
print('ERROR: found name with 1 component or over 4 components')
kc_citations.loc[row_idx,'Officer First Name'] = first
kc_citations.loc[row_idx,'Officer Middle Name'] = middle
kc_citations.loc[row_idx,'Officer Last Name'] = last
# remove original, inconsistently-formatted name column
kc_citations.drop(columns=['Officer Name'],inplace=True)
# load Seattle Municipal Court infraction records from 2003-2016 (incomplete due to purge schedule)
smc_citations_2003_2016 = pd.read_excel(data_dir + '2020-07-20 - Seattle Municipal Court bicycle violations (2003-01-01 to 2016-12-31).xlsx',
index_col=None,parse_dates=[0,1])
# load Seattle Municipal Court infraction records from 2017-2020
smc_citations_2017_2020 = pd.read_excel(data_dir + '2020-07-16 - Seattle Municipal Court bicycle violations (2017-01-01 to 2020-06-30).xlsx',
index_col=None,parse_dates=[0,1])
# fix misnamed columns in records from 2003-2016
temp_officer_number = smc_citations_2003_2016['Officer First Name'].copy()
temp_first_name = smc_citations_2003_2016['Officer Middle Initial'].copy()
temp_middle_initial = smc_citations_2003_2016['Officer Last Name'].copy()
temp_last_name = smc_citations_2003_2016['Officer Number'].copy()
smc_citations_2003_2016['Officer First Name'] = temp_first_name
smc_citations_2003_2016['Officer Middle Initial'] = temp_middle_initial
smc_citations_2003_2016['Officer Last Name'] = temp_last_name
smc_citations_2003_2016['Officer Number'] = temp_officer_number
# merge old and new data
smc_citations = pd.concat([smc_citations_2003_2016,smc_citations_2017_2020]).copy().reset_index(drop=True)
# remove duplicate entries (same defendant, same violation)
# but retain different violations (e.g. helmet and light violations) issued to same defendant under same citation number
# NOTE: this will impact analyses of officers, since it seems most (if not all) duplicates list two different officers
smc_citations = smc_citations.drop_duplicates(subset=['Citation Number','Violation Code'],keep='first').reset_index(drop=True)
# drop unneeded columns
smc_citations.drop(columns=['Violation Location'],inplace=True)
# load Seattle Municipal Court infraction records from 2003-2021
# (including charge amounts and status, plus more defendant data)
smc_batch2_pt1 = pd.read_excel(data_dir + '2021-01-28 - Seattle Municipal Court bicycle violations (2003-01-01 to 2016-12-31).xlsx',
index_col=None,parse_dates=[12,13])
smc_batch2_pt2 = pd.read_excel(data_dir + '2021-02-16 - Seattle Municipal Court bicycle violations (2017-01-01 to 2021-02-16).xlsx',
index_col=None,parse_dates=[12,13])
smc_citations_batch2 = pd.concat([smc_batch2_pt1,smc_batch2_pt2]).copy().reset_index().drop(columns='index')
smc_citations_batch2.rename(columns={'Race Code':'Defendant Race','Gender Code':'Defendant Gender'},inplace=True)
# drop unneeded columns and standardize column names
smc_citations_batch2.drop(columns=['Case Number','City','State','Zip Code',
'Defendant First Name','Defendant Last Name',
'Violation Location','In Collection','Obligation Number'],inplace=True)
# remove duplicate entries (same defendant, same violation) – SEE DETAILS ABOVE IN SIMILAR LINE
smc_citations_batch2 = smc_citations_batch2.drop_duplicates(subset=['Citation Number','Violation Code'],keep='first').reset_index(drop=True)
# fix financial charge information, based on information from Gary Ireland, SMC PIO:
# "Instances where the Obligation Amount Outstanding is either 0 or less than the difference
# between the Assessed and Paid amounts, is caused by waiver or write-off by a judge for
# one reason or another."
# this appears to be accurate for all citations issued ("Violation Date") in 2011 onwards;
# however, inspection of the data shows nearly all records prior to 2011 have a zero value for
# "Obligation Amount Outstanding," which is highly suspicious. thus, we infer:
# - prior to 2011: AR Ordered Amount = "Amount Assessed"
# AR Adjusted Amount = NaN (no data available)
# - from 2011 onwards: AR Ordered Amount = "Amount Assessed" (NOTE: this doesn't seem to include late/default penalties)
# AR Adjusted Amount = MIN("Amount Assessed", "Amount Paid"+"Amount Outstanding")
smc_citations_batch2.loc[smc_citations_batch2['Violation Date'].dt.year < 2011,'AR Adjusted Amount'] = NaN
smc_citations_batch2['TEMP - paid plus outstanding'] \
= smc_citations_batch2['Obligation Amount Paid'] + \
smc_citations_batch2['Obligation Amount Outstanding']
smc_citations_batch2['TEMP - inferred adjusted amount'] \
= smc_citations_batch2[['Obligation Amount Assessed','TEMP - paid plus outstanding']].min(axis=1)
smc_citations_batch2.loc[smc_citations_batch2['Violation Date'].dt.year >= 2011,'AR Adjusted Amount'] \
= smc_citations_batch2.loc[smc_citations_batch2['Violation Date'].dt.year >= 2011,'TEMP - inferred adjusted amount']
smc_citations_batch2.drop(columns=['TEMP - paid plus outstanding',
'TEMP - inferred adjusted amount'],inplace=True)
smc_citations_batch2 \
= smc_citations_batch2.rename(columns={'Obligation Amount Assessed':'AR Ordered Amount',
'Obligation Amount Paid':'AR Paid Amount',
'Obligation Amount Outstanding':'AR Due Amount'})
# merge two batches of SMC records
# NOTE: see below for "fix" to merge duplicate rows with slightly different missing (NaN) columns
smc_citations = pd.merge(smc_citations,smc_citations_batch2,how='outer')
# merge date and time columns
smc_citations['Violation Datetime'] = smc_citations['Violation Date'] \
+ pd.to_timedelta(floor(smc_citations['Violation Time']/100),'h') \
+ pd.to_timedelta(smc_citations['Violation Time'] - 100*floor(smc_citations['Violation Time']/100),'m')
# drop unneeded columns
smc_citations.drop(columns=['Violation Time'],inplace=True)
# handle entries missing Defendant Race information
smc_citations['Defendant Race'] = smc_citations['Defendant Race'].fillna('U')
# clean up officer badge numbers
smc_citations['Officer Number'] = smc_citations['Officer Number'].replace(9999,NaN)
# fix previous merge by merging duplicate rows with slightly different missing (NaN) columns
smc_citations = smc_citations.groupby(['Citation Number','Violation Code'])\
.apply(lambda x: x.fillna(method='ffill').\
fillna(method='bfill').\
drop_duplicates()).\
reset_index(drop=True)
# display(smc_citations)
# standardize column names
smc_citations = smc_citations.rename(columns={'Filing Date':'Case File Date',
'Citation Number':'Case Number',
'Violation Code':'Law Code',
'Violation Description':'Law Description',
'Officer Middle Initial':'Officer Middle Name',
'Officer Number':'Officer Badge Number'})
# standardize data
smc_citations['Law Description'] = smc_citations['Law Description'].str.title()
smc_citations['Officer First Name'] = smc_citations['Officer First Name'].str.title()
smc_citations['Officer Last Name'] = smc_citations['Officer Last Name'].str.title()
smc_citations['Law Code'] = [old_str.replace('SMC','Seattle Municipal Code ') if old_str[:3] == 'SMC' else old_str \
for old_str in smc_citations['Law Code']]
smc_citations['Law Code'] = ['Seattle Municipal Code ' + old_str if old_str[:5] == '11.44' else old_str \
for old_str in smc_citations['Law Code']]
smc_citations.replace({'Law Code':{'R9.15.010':'King County Health Code 9.10.010A or 9.15.010',
'KCHC9.10.010':'King County Health Code 9.10.010A or 9.15.010', # assuming section A
'H9.10.010':'King County Health Code 9.10.010A or 9.15.010'}, # assuming section A
'Law Description':{'Rights And Duties Of Riders':'Bicycle Rider Rights/Duties Violation',
'Bike Rider Rights/Duties Violation':'Bicycle Rider Rights/Duties Violation',
'Bike, Lamps/Reflectors':'Bicycle No/Improper Lamps/Reflectors',
'Bike, R/W Xwalk':'Bicycle Yield Right of Way in Crosswalk',
'Bike R/W In Crosswalk':'Bicycle Yield Right of Way in Crosswalk',
'Bike, Ride On Road':'Bicycle Riding on Roadways Rules Violation',
'Bike Riding On Road Rules Viol':'Bicycle Riding on Roadways Rules Violation',
'Bike No/Improper Lamps/Reflectors':'Bicycle No/Improper Lamps/Reflectors',
'Bike Unsafe Pass On Right':'Bicycle Unsafe Pass on Right',
'Bike Yield R/W On Public Path':'Bicycle Riding on Sidewalk/Public Path Rules Violation',
'Bike, Yield R/W':'Bicycle Riding on Sidewalk/Public Path Rules Violation',
'Bike, Pass, Right':'Bicycle Unsafe Pass on Right',
'Bike No/Improper Hand Signals':'Bicycle No/Improper Hand Signals',
'Bike, Hand Signals':'Bicycle No/Improper Hand Signals',
'Bike, No Brakes':'Bicycle No/Improper Brakes',
'Bike No/Improper Brakes':'Bicycle No/Improper Brakes',
'Bike, Control':'Bicycle Control One Hand Minimum Required',
"Bike Control One Hand Min Req'D":'Bicycle Control One Hand Minimum Required',
'Bike, Clinging To Veh':'Bicycle Clinging to Vehicle',
'Bike Clinging/Attaching To Vehicle':'Bicycle Clinging to Vehicle',
'Bike, Excess Passengers':'Bicycle Excess Passengers',
'Bike, Fail To Ride On Seat':'Bicycle Failure to Ride on Seat',
'Bike, Responsibility - Child/Parent':'Bicycle Parent of Child Responsibility',
'Bike Ride >2 Abreast Road/Sidewalk':'Bicycle Riding More Than 2 Abreast'},
'Defendant Race':{'W':'White','B':'Black','A':'Asian or Pacific Islander',
'I':'American Indian or Alaskan Native','U':'Unknown/Other'},
'Defendant Gender':{'M':'Male','F':'Female','U':'Unknown'}},inplace=True)
# add missing columns
smc_citations['Court Name'] = 'Seattle Municipal Court (via PDR)'
smc_citations['Case Type'] = 'Infraction'
smc_citations['Law Enforcement Agency'] = 'Seattle Police Department'
smc_citations['Disposition'] = 'Unknown'
smc_citations['Defendant Ethnicity'] = 'Unknown'
smc_citations['Originating Query'] = 'All bike violations (requested KCHC 9.10.010 and 9.15.010, SMC 11.44)'
# for third time, remove duplicate entries (same defendant, same violation) – SEE DETAILS ABOVE IN SIMILAR LINE
# this is necessary because "Violation Code" column was not yet homogenized above
smc_citations = smc_citations.drop_duplicates(subset=['Case Number','Law Code'],keep='first').reset_index(drop=True)
# identify records not already present in existing data
# (NOTE: for SMC data, all records are not already present, so this is a formality)
new_indices = [idx for idx, case_num in enumerate(smc_citations['Case Number']) \
if case_num not in kc_citations['Case Number'].values]
smc_citations = smc_citations.loc[new_indices]
# add to rest of data
kc_citations = pd.merge(kc_citations,smc_citations,how='outer')
def assign_city(city_name,lea_is=None,lea_contains=None,law_contains=None,court_contains=None):
# boolean mask for rows without a town/city/jurisdiction assignment
city_unassigned_mask = kc_citations['City'].apply(lambda x: not isinstance(x, str))
# assign town/city/jurisdiction based on LEA and other criteria
if lea_is is not None:
lea_mask = kc_citations['Law Enforcement Agency'] == lea_is
elif lea_contains is not None:
lea_mask = kc_citations['Law Enforcement Agency'].str.contains(lea_contains)
if law_contains is not None:
assign_mask = logical_and.reduce((city_unassigned_mask,lea_mask,
kc_citations['Law Code'].str.contains(law_contains)))
else:
assign_mask = logical_and(city_unassigned_mask,lea_mask)
if court_contains is not None:
assign_mask = logical_and(assign_mask,kc_citations['Court Name'].str.contains(court_contains))
kc_citations.loc[assign_mask,'City'] = city_name
# near-certain assignments made based on law enforcement agency, court, and local code referenced
assign_city('Pacific',lea_is='Algona Police Department',law_contains='Pacific Municipal Code')
assign_city('Algona',lea_is='Algona Police Department')
assign_city('Auburn',lea_is='Auburn Police Department')
assign_city('Bellevue',lea_is='Bellevue Police Department')
assign_city('Black Diamond',lea_is='Black Diamond Police Department')
assign_city('Bothell',lea_is='Bothell Police Department')
assign_city('Burien',lea_contains='Burien Police Department')
assign_city('Covington',lea_contains='Covington Police Department')
assign_city('Des Moines',lea_is='Des Moines Police Department')
assign_city('Enumclaw',lea_is='Enumclaw Police Department')
assign_city('Federal Way',lea_is='Federal Way Police Department')
assign_city('Issaquah',lea_is='Issaquah Police Department')
assign_city('Kent',lea_is='Kent Police Department')
assign_city('Kirkland',lea_is='Kirkland Police Department')
assign_city('Lake Forest Park',lea_is='Lake Forest Park Police Department')
assign_city('Mercer Island',lea_is='Mercer Island Police Department')
assign_city('Pacific',lea_is='Pacific Police Department')
assign_city('Redmond',lea_is='Redmond Police Department')
assign_city('Renton',lea_is='Renton Police Department')
assign_city('Sammamish',lea_contains='Sammamish Police Department')
assign_city('SeaTac',lea_contains='SeaTac Police Department')
assign_city('Seattle',lea_is='Seattle Police Department')
assign_city('Seattle',lea_is='University Of Washington Police Department')
assign_city('Seattle',lea_is='Port of Seattle Police Department')
assign_city('Tukwila',lea_is='Tukwila Police Department')
assign_city('Kirkland',lea_contains='Woodinville Police Department',court_contains='Kirkland Municipal Court')
assign_city('Woodinville',lea_contains='Woodinville Police Department')
assign_city('North Bend',lea_is="King County Sheriff's Office / North Bend Police Department")
assign_city('Issaquah',lea_is='Snoqualmie/North Bend Police Department',law_contains='Issaquah Municipal Code')
assign_city('Snoqualmie',lea_is='Snoqualmie/North Bend Police Department',law_contains='Snoqualmie Municipal Code')
assign_city('Kenmore',lea_contains='Kenmore Police Department')
assign_city('Shoreline',lea_contains='Shoreline Police Department')
assign_city('Maple Valley',lea_contains='Maple Valley Police Department')
assign_city('Maple Valley',lea_is="King County Sheriff's Office",law_contains='Maple Valley Municipal Code')
# ambiguous assignments
assign_city('Snoqualmie',lea_is='Snoqualmie/North Bend Police Department') # 6 citations (could be Snoqualmie or North Bend)
assign_city('Unincorporated King County',lea_is='Washington State Patrol') # 10 citations without any city ties (all within KCDC jurisdiction)
assign_city('Unincorporated King County',lea_is="King County Sheriff's Office") # 81 citations without any city ties (all within KCDC jurisdiction)
# [to save] lookup code (similar to count_values but does not exclude NaN values)
# kc_citations.loc[kc_citations['Law Enforcement Agency'].str.contains('Maple Valley')]\
# .groupby(['Court Name','Law Enforcement Agency','Law Code','City'],dropna=False,as_index=False).size()
kc_citations.to_excel(data_dir + '2021-10-18 - compiled King County bike citation records.xlsx')