#!/usr/bin/env python # coding: utf-8 # # Compiling, homogenizing, and quality-controlling King County bike-related citation records # # ##### Ethan C. Campbell, for Central Seattle Greenways / Helmet Law Working Group # # For questions, contact me at ethanchenbell@gmail.com. # # For analysis of the data compiled below, refer to the companion notebook on my GitHub: [***csg_analyze_king_county_bike_citations.ipynb***](https://github.com/ethan-campbell/Miscellaneous/blob/master/csg_analyze_king_county_bike_citations.ipynb). # #### Import packages and set file system # In[333]: get_ipython().run_line_magic('matplotlib', 'inline') from numpy import * import pandas as pd import matplotlib.pyplot as plt import matplotlib.dates as mdates import matplotlib as mpl # mpl.rcParams['figure.dpi'] = 300 # turn on for higher-quality figure export from datetime import datetime, timedelta import platform import warnings import sys from IPython.core.display import display, HTML # display(HTML("")) # choose root directory for data files if platform.system() == 'Darwin': data_dir = '/Users/Ethan/Documents/Finances and records/2020-06-30 - Helmet Law Working Group/Data/' elif platform.system() == 'Linux': data_dir = '/dat1/ethancc/CSG/' # filepaths aoc_dir = data_dir + '2021-06-22 - WA State Administrative Office of the Courts data/' # #### Load and merge bike citation records from WA State Administrative Office of the Courts (AOC) # In[334]: # load bike infraction data from WA State Administrative Office of the Courts # note: "KCDC" = King County District Court # "KCMC" / "MCs" = municipal courts in King County kcdc_citations = pd.read_excel(aoc_dir + '210419-000051_KCDC_Bicycle_Viols - updated.xlsx',header=2,skipfooter=1) kcmc_citations = pd.read_excel(aoc_dir + '210419-000051_KCMC_Bicycle_Viols - updated.xlsx',header=3,sheet_name='Bicycle Charges') kcmc_lea_data = pd.read_excel(aoc_dir + '210419-000051_KCMC_Bicycle_Viols - updated.xlsx',header=2,sheet_name='LEA') kcmc_lfo_data = pd.read_excel(aoc_dir + '210419-000051_KCMC_Bicycle_Viols - updated.xlsx',header=2,sheet_name='LFOs') # merge in data on officers, badge numbers, and accounts receivables (AR) charges (all provided for municipal courts only) kcmc_lea_data = kcmc_lea_data.drop(columns=['Court Name','Case Token','Case LEA Name']) kcmc_citations = kcmc_citations.merge(kcmc_lea_data,how='left',on='Case Number') kcmc_lfo_data = kcmc_lfo_data.drop(columns=['Court Name','Case Token']) kcmc_citations = kcmc_citations.merge(kcmc_lfo_data,how='left',on='Case Number') # standardize column names kcdc_citations = kcdc_citations.rename(columns={'Case Key':'Case Key (KCDC) or Token (MCs)', 'Case Type Code & Desc':'Case Type', 'Case LEA':'Law Enforcement Agency', 'Charge Violation Date':'Violation Date', 'Charge Law Number':'Law Code', 'Charge Law':'Law Description', 'Charge Result':'Disposition', 'Charge Result Date':'Disposition Date', 'Case Participant Actor Name':'Defendant Name', 'Actor Person Gender':'Defendant Gender', 'Actor Person Race':'Defendant Race', 'Actor Person Ethnicity':'Defendant Ethnicity'}) kcmc_citations = kcmc_citations.rename(columns={'Case Token':'Case Key (KCDC) or Token (MCs)', 'Case LEA Name':'Law Enforcement Agency', 'Case Citation Charge Date':'Violation Date', 'Charge Law Number':'Law Code', 'Charge':'Law Description', 'Charge Disposition':'Disposition', 'Charge Disposition Date':'Disposition Date', 'Person Name':'Defendant Name', 'Person Gender':'Defendant Gender', 'Person Race':'Defendant Race', 'Person Ethnicity':'Defendant Ethnicity', 'Official Name':'Officer Name', 'Official Officer Badge Number':'Officer Badge Number'}) # redact identifying defendant data and unneeded columns # NOTE: keep only Federal Way Municipal Court defendant names to allow checking for duplicate citations with separate FWMC data kcdc_citations = kcdc_citations.drop(columns=['#Age at Viol Dt','Case Participant Type Code']) kcmc_citations = kcmc_citations.drop(columns=['#Age at Viol Dt','Specific Participant Type Code']) kcdc_citations['Defendant Name'] = NaN kcmc_citations.loc[kcmc_citations['Court Name'] != 'FEDERAL WAY MUNICIPAL COURT','Defendant Name'] = NaN # merge KCDC and MC data kc_citations = pd.concat([kcmc_citations,kcdc_citations],ignore_index=True) # flag all as originating from query for helmet and non-helmet bike violations kc_citations['Originating Query'] = 'All bike violations (requested KCHC 9.10.010 and 9.15.010, RCW sections, WAC sections, and local code chapters)' # #### Remove records that are not bike-related # In[335]: # remove one citation under King County Code 14A.40.010d (requiring motorized foot scooter users to wear a helmet) kc_citations = kc_citations[kc_citations['Law Code'] != 'kcc14a.40.010.d'] # remove three concurrent citations for single person by North Bend Police Department, two of which are "bicycle" citations # but the third of which is under NBMC 10.20.030 (requiring motorized foot scooter users to wear a helmet) kc_citations = kc_citations[kc_citations['Case Number'] != '4Z0454185'] # remove one citation under Kent Municipal Code 9.44.050 (requiring motorized foot scooter users to wear a helmet) kc_citations = kc_citations[kc_citations['Law Code'] != '9.44.050'] # #### Standardize and homogenize data # In[336]: kc_citations['Court Name'] = kc_citations['Court Name'].str.title() kc_citations.replace({'Court Name':{'King District Court - Kcdc':'King County District Court', 'Kcdc Auburn Courthouse':'King County District Court', 'Seatac Municipal Court':'SeaTac Municipal Court'}},inplace=True) kc_citations.replace({'Case Type':{'IN - Infraction':'Infraction', 'CR - Criminal':'Criminal', "Defendant's race":'Infraction', 'Limited to the defendant':'Infraction', 'Calculated by days between defendant birth date and charge violation date':'Infraction', 'defendant gender':'Infraction'}},inplace=True) kc_citations['Law Enforcement Agency'] = kc_citations['Law Enforcement Agency'].str.title() kc_citations.replace({'Law Enforcement Agency':{'Maple Valley Police Department':"King County Sheriff's Office / Maple Valley Police Department", 'Burien Police Department':"King County Sheriff's Office / Burien Police Department", 'Covington Police Department':"King County Sheriff's Office / Covington Police Department", 'Kenmore City Of':"King County Sheriff's Office / Kenmore Police Department", 'Woodinville Police Department':"King County Sheriff's Office / Woodinville Police Department", 'Sammamish Police Department':"King County Sheriff's Office / Sammamish Police Department", 'King Co Police Department':"King County Sheriff's Office", 'Lake Forest Park Police Dept':'Lake Forest Park Police Department', 'Port Of Seattle Police Department':'Port of Seattle Police Department', 'Seatac Police Department':"King County Sheriff's Office / SeaTac Police Department", 'Shoreline City Of':"King County Sheriff's Office / Shoreline Police Department", 'University Of Washington Police':'University Of Washington Police Department', 'Wa St Patrol':'Washington State Patrol'}},inplace=True) kc_citations.loc[logical_and(kc_citations['Law Enforcement Agency'] == 'North Bend Police Department', kc_citations['Violation Date'] <= datetime(2014,3,8)), 'Law Enforcement Agency'] = "King County Sheriff's Office / North Bend Police Department" kc_citations.loc[logical_and(kc_citations['Law Enforcement Agency'] == 'North Bend Police Department', kc_citations['Violation Date'] > datetime(2014,3,8)), 'Law Enforcement Agency'] = "Snoqualmie/North Bend Police Department" kc_citations.loc[logical_and(kc_citations['Law Enforcement Agency'] == 'Snoqualmie Police Department', kc_citations['Violation Date'] > datetime(2014,3,8)), 'Law Enforcement Agency'] = "Snoqualmie/North Bend Police Department" kc_citations['Law Description'] = kc_citations['Law Description'].str.title() kc_citations.replace({'Law Description':{'Bicycle Helmet Regulations':'Bicycle Helmet Required', 'Bicycle Helmet Violation':'Bicycle Helmet Required', 'Bicycle Helmets':'Bicycle Helmet Required', 'Fail To Wear Bicycle Helmet':'Bicycle Helmet Required', 'Helmet Required':'Bicycle Helmet Required', 'Helmet Required Riding Bicycle':'Bicycle Helmet Required', 'Helmet Required When Riding Bicyle':'Bicycle Helmet Required', 'No Bicycle Helmet':'Bicycle Helmet Required', 'No Bicyle Helmet':'Bicycle Helmet Required', 'Require Bicycle Helmet Guardian Of':'Bicycle Helmet Required (Guardian of Minor)', 'Requirement Re Bicycle Helmet':'Bicycle Helmet Required', 'Riding Bicycle W/O Helmet':'Bicycle Helmet Required', 'Riding Bicycle W/Out Helmet':'Bicycle Helmet Required', 'Riding Bicycle Without Helmet':'Bicycle Helmet Required', 'Bicycle Fail To Yield':'Bicycle Failure to Yield', 'Bicycle Leaving Curb X-Walk Spd Zn':'Bicycle Leaving Curb or Crosswalk in Speed Zone', 'No Bicycle Lights As Required':'Bicycle Lights Required', 'Bicycle Riding On Sidewalk':'Bicycle Riding on Sidewalk', 'Bicycle Riding On Sidewalks':'Bicycle Riding on Sidewalk', 'Defective Equipment-Bicycle':'Bicycle Defective Equipment', 'Bicycle-Fail To Stop At Stop Sign':'Bicycle Failure to Stop at Stop Sign', 'Bicycle-Negligence':'Bicycle Negligence', 'Bicycles Obedience To Traffic Dev':'Bicycle Obedience to Traffic Control Devices', 'Bicycles Ride On Right Side':'Riding Bicycle on Right Side', 'Bike On Sidewalk In Business Dist':'Bicycle on Sidewalk in Business District', 'Carrying Articles On Bicycle':'Carrying Articles on Bicycle', 'Fld To Give Hand Signals/Bike':'Bicycle Failure to Give Hand Signals', 'Improper Operation Of Bicycle':'Improper Operation of Bicycle', 'No Bicycles On Skate Park':'No Bicycles on Skate Park', 'Ride Bicycle On Transit Property':'Riding Bicycle on Transit Property', 'Riding Bicycle On Sidewalk/Path':'Riding Bicycle on Sidewalk or Path', 'Riding Bicycles On Right Side':'Riding Bicycle on Right Side', 'Speeding On Bicycle Trail':'Speeding on Bicycle Trail', 'Unlawful Riding On Bicycle':'Unlawful Riding on Bicycle', 'Violating Laws While Riding Bike':'Violating Laws While Riding Bicycle'}},inplace=True) kc_citations.loc[logical_and(kc_citations['Law Code'] == '46.61.780', kc_citations['Law Description'] == 'Bicycle Violation'), 'Law Description'] = 'Bicycle Defective Equipment' kc_citations.replace({'Disposition':{'Dismissed W/Prejudice':'Dismissed with Prejudice', 'Dismissed W/O Prejudice':'Dismissed without Prejudice', 'Dismissed After Deferral':'Dismissed after Deferral', 'Committed After Deferral':'Committed after Deferral'}},inplace=True) kc_citations.replace({'Defendant Race':{'White (Caucasian)':'White', 'Black (African American)':'Black', 'Asian':'Asian or Pacific Islander', 'Unknown':'Unknown/Other', 'Multiracial':'Unknown/Other'}},inplace=True) kc_citations.replace({'Defendant Ethnicity':{'Hispanic':'Hispanic/Latino', 'Hispanic or Latino':'Hispanic/Latino', 'Non-Hispanic':'Not Hispanic/Latino', 'Not Hispanic or Latino':'Not Hispanic/Latino'}},inplace=True) kc_citations['Officer Name'] = kc_citations['Officer Name'].str.title() combined_officer_strings = [name for name in kc_citations['Officer Name'].value_counts().index if ' ' in name] fixed_officer_strings = [name.split(' ')[0] for name in kc_citations['Officer Name'].value_counts().index if ' ' in name] for ofc_idx, ofc_name in enumerate(combined_officer_strings): kc_citations.replace({'Officer Name':{ofc_name:fixed_officer_strings[ofc_idx]}},inplace=True) kc_citations.replace({'Law Code':{'D.10.12.010(1)':'Des Moines Municipal Code 10.12.010 or 10.12.010(1)', 'ECM1804':'[Enumclaw - no match found] "ECM 1804"', 'ECM1807':'[Enumclaw - no match found] "ECM 1807"', 'MVO.8.15.030A':'Maple Valley Municipal Code 8.15.030 or 8.15.030A', 'M8.15.030':'Maple Valley Municipal Code 8.15.030 or 8.15.030A', 'FWRC8.25.060':'Federal Way Revised Code 8.25.060', 'FWC15.196':'Federal Way Revised Code 8.25.060', 'IQM10.44.025':'Issaquah Municipal Code 10.44.025', 'SNL10.30.050':'Snoqualmie Municipal Code 10.30.050', 'K9.41.030':'Kent City Code 9.41.030', '9.41.030':'Kent City Code 9.41.030', 'LFM9.18.020':'Lake Forest Park Municipal Code 9.18.020', '10.54.080':'Pacific Municipal Code 10.54.080', 'P.10.54.080':'Pacific Municipal Code 10.54.080', 'R6-26(3)(A)':'Renton Municipal Code 6-26-3(A) or 6-26-7', 'R6-26-7':'Renton Municipal Code 6-26-3(A) or 6-26-7', '10.56.185':'Auburn City Code 10.56.185', 'amc 10.56.185':'Auburn City Code 10.56.185', 'amc10.56.185':'Auburn City Code 10.56.185', 'ke9.05.450':'Kenmore Municipal Code 9.05.450 (partially adopts KCHC Title 9 by reference)', 'be11.60.090':'Bellevue City Code 11.60.090', 'bu8.37.050':'Burien Municipal Code 8.37.050 or 8.37.050.1', 'bu8.37.050.1':'Burien Municipal Code 8.37.050 or 8.37.050.1', 'KCHC9.10.010A':'King County Health Code 9.10.010A or 9.15.010', '9.10.010.a':'King County Health Code 9.10.010A or 9.15.010', '9.15.010':'King County Health Code 9.10.010A or 9.15.010', '9.10.010.b':'King County Health Code 9.10.010B'}},inplace=True) kc_citations.replace({'Law Code':{'46.61.780':'RCW 46.61.780', '46.61.755':'RCW 46.61.755', '46.61.770':'RCW 46.61.770', '46.61.758':'RCW 46.61.758', '46.61.775':'RCW 46.61.775', '46.61.760':'RCW 46.61.760', '46.61.235.2B':'RCW 46.61.235.2B', '46.61.235.2b':'RCW 46.61.235.2B', '46.61.235.5-2b':'RCW 46.61.235.2B/5', '308.330.545':'WAC 308-330-545', '308.330.555':'WAC 308-330-555', '308-330-555':'WAC 308-330-555', 'KCC7.12.295.A':'King County Code 7.12.295 or 7.12.295.A', '28.96.010.a4':'King County Code 28.96.010.A4', 'amc 10.56.110':'Auburn City Code 10.56.110', 'amc10.56.110':'Auburn City Code 10.56.110', '10.56.110':'Auburn City Code 10.56.110', '10.56.120':'Auburn City Code 10.56.120', '10.56.140':'Auburn City Code 10.56.140', 'amc 10.56.140':'Auburn City Code 10.56.140', 'LFMORD798.2D':'Lake Forest Park Ordinance 798.2D', 'LFM798.2B':'Lake Forest Park Ordinance 798.2B', 'FWRC8.25.020.1':'Federal Way Revised Code 8.52.010.1', 'smc11.44.120':'Seattle Municipal Code 11.44.120', 'BDM8.28.040':'Black Diamond Municipal Code 8.28.040'}},inplace=True) # #### Load and merge Des Moines Municipal Court records # In[337]: dmmc_citations = pd.read_excel(data_dir + '2020-07-21 - Des Moines Municipal Court helmet infraction data - transcribed.xlsx', header=0) # identify records not already present in existing data new_indices = [idx for idx, case_num in enumerate(dmmc_citations['Case Number']) \ if case_num not in kc_citations['Case Number'].values] dmmc_citations = dmmc_citations.loc[new_indices] display(dmmc_citations) # note: no new records to merge # #### Load and merge Renton Municipal Court records # In[338]: rmc_citations = pd.read_excel(data_dir + '2020-07-22 - Renton Municipal Court helmet infraction data (2007-08-20 to 2020-06-30).xlsx', header=4) # redact identifying defendant data and unneeded columns rmc_citations = rmc_citations.drop(columns=['Case Def Name']) # identify records not already present in existing data new_indices = [idx for idx, case_num in enumerate(rmc_citations['Case #']) \ if case_num not in kc_citations['Case Number'].values] rmc_citations = rmc_citations.loc[new_indices] # display(rmc_citations) # In[339]: # drop columns with missing/poor/unneeded data rmc_citations = rmc_citations.drop(columns=['Unnamed: 2','Type']) # standardize column names rmc_citations = rmc_citations.rename(columns={'Filed Date':'Case File Date', 'Case #':'Case Number', 'LEA':'Law Enforcement Agency', 'Official Officer Badge Number':'Officer Badge Number', 'Viol Date':'Violation Date', 'Person Gender Code':'Defendant Gender', 'Person Race Code':'Defendant Race', 'Person Ethnicity Code':'Defendant Ethnicity', 'Dispo Date':'Disposition Date', 'Dispo':'Disposition'}) # standardize data rmc_citations.replace({'Law Enforcement Agency':{'RPD':'Renton Police Department'}, 'Defendant Gender':{'M':'Male'}, 'Defendant Race':{'W':'White'}, 'Defendant Ethnicity':{'U':'Unknown'}, 'Disposition':{'DO':'Unknown ("DO")', 'C':'Committed'}},inplace=True) # add missing columns rmc_citations['Court Name'] = 'Renton Municipal Court (via PDR)' rmc_citations['Case Type'] = 'Infraction' rmc_citations['Law Code'] = 'Renton Municipal Code 6-26-3(A) or 6-26-7' rmc_citations['Law Description'] = 'Bicycle Helmet Required' rmc_citations['Originating Query'] = 'Helmet violations only (requested RMC 6-26)' # add to rest of data kc_citations = pd.merge(kc_citations,rmc_citations,how='outer') # #### Load and merge King County Sheriff's Office records # # Note: missing cities, towns, and jurisdictions (i.e., unincorporated King County) were filled in by looking up given addresses and cross-streets on Google Maps and the [Unincorporated King County Community Service Areas](https://kingcounty.gov/~/media/depts/local-services/community-service-areas/map-all-areas-detail.ashx?la=en) map prior to loading the spreadsheet below. # In[340]: kcso_citations = pd.read_excel(data_dir + "2020-07-27 - King County Sheriff's Office helmet infraction data (2010-03-16 to 2019-08-22) - missing jurisdictions filled in.xlsx", header=0) # redact identifying defendant data and unneeded columns kcso_citations = kcso_citations.drop(columns=['Age','Height','Weight','Eye Color','Hair Color', 'Officer Notes','Case Number', 'Primary Trafficway','Block Number','Reference Trafficway']) # In[341]: # standardize column names kcso_citations['Violation Date'] = kcso_citations['Date / Time of Incident'].dt.date kcso_citations = kcso_citations.rename(columns={'Date / Time of Incident':'Violation Datetime', 'Ticket Number':'Case Number', 'Citation Type':'Case Type', 'Violation Code':'Law Code', 'Violation Title':'Law Description', 'Agency':'Law Enforcement Agency', 'Badge':'Officer Badge Number', 'Officer':'Officer Name', 'Sex':'Defendant Gender', 'Race':'Defendant Race'}) # split records representing multiple citations into two separate lines kcso_citations = (kcso_citations.set_index([col_name for col_name in kcso_citations.keys() \ if col_name not in ['Law Code','Law Description']]) .apply(lambda col: col.str.split(';').explode()) .reset_index() .reindex(kcso_citations.columns, axis=1)) # standardize data kcso_citations.replace({'Law Enforcement Agency':{'Kenmore PD':"King County Sheriff's Office / Kenmore Police Department", 'Covington PD':"King County Sheriff's Office / Covington Police Department", 'Maple Valley PD':"King County Sheriff's Office / Maple Valley Police Department", 'Newcastle PD':"King County Sheriff's Office / Newcastle Police Department", 'Metro Transit Police':"King County Sheriff's Office / King County Metro Transit Police", 'Shoreline PD':"King County Sheriff's Office / Shoreline Police Department", 'Woodinville PD':"King County Sheriff's Office / Woodinville Police Department", 'Sammamish PD':"King County Sheriff's Office / Woodinville Police Department"}, 'City':{'Vashon Island':'Unincorporated King County', 'Vashon':'Unincorporated King County', 'White Center':'Unincorporated King County', 'Queen Anne':'Maple Valley'}, 'Defendant Gender':{'M':'Male','F':'Female'}, 'Defendant Race':{'Unknown':'Unknown/Other', 'American Indian':'American Indian or Alaskan Native'}, 'Case Type':{'Infraction Non-traffic':'Infraction Non-Traffic'}},inplace=True) kcso_citations.loc[kcso_citations['Defendant Race'] == 'Hispanic','Defendant Ethnicity'] = 'Hispanic/Latino' kcso_citations.loc[kcso_citations['Defendant Race'] != 'Hispanic','Defendant Ethnicity'] = 'Unknown' kcso_citations['Law Description'] = kcso_citations['Law Description'].str.title() kcso_citations.replace({'Law Description':{'Requirement Re Bicycle Helmet':'Bicycle Helmet Required', 'Bicycle Helmet Violation':'Bicycle Helmet Required', 'Bicycle Helmets':'Bicycle Helmet Required', 'Helmet Required':'Bicycle Helmet Required', 'Fail To Wear Bicycle Helmet':'Bicycle Helmet Required', ' Fail To Wear Bicycle Helmet':'Bicycle Helmet Required', 'Ride Bike W/O Helmet':'Bicycle Helmet Required', 'Require Bicycle Helmet Guardian Of':'Bicycle Helmet Required (Guardian of Minor)', ' Trail Use Violation':'Trail Use Violation', 'Pedestrian On Roadway Unlawfully':'Pedestrian on Roadway Unlawfully'}},inplace=True) kcso_citations.replace({'Law Code':{'9.10.010.A':'King County Health Code 9.10.010A or 9.15.010', '9.15.010':'King County Health Code 9.10.010A or 9.15.010', '9.10.010.B':'King County Health Code 9.10.010B', ' KCC.7.12.295':'King County Code 7.12.295 or 7.12.295.A', 'KE9.05.450':'Kenmore Municipal Code 9.05.450 (partially adopts KCHC Title 9 by reference)', 'BE11.60.090':'Bellevue City Code 11.60.090', ' BE11.60.090':'Bellevue City Code 11.60.090', 'M8.15.030':'Maple Valley Municipal Code 8.15.030 or 8.15.030A', 'MVO.8.15.030A':'Maple Valley Municipal Code 8.15.030 or 8.15.030A', 'M8.15.030A':'Maple Valley Municipal Code 8.15.030 or 8.15.030A', '46.61.250.1':'RCW 46.61.250.1'}},inplace=True) # In[342]: # identify records already present in existing data # extract columns to add to (or check with) existing data existing_indices = [idx for idx, case_num in enumerate(kcso_citations['Case Number']) \ if case_num in kc_citations['Case Number'].values] existing_kcso_citations = kcso_citations.loc[existing_indices][['Case Number','Law Enforcement Agency','City', 'Officer Badge Number','Officer Name']] # merge into rest of data (overwrite existing columns) for idx in existing_kcso_citations.index: for col_name in existing_kcso_citations.keys()[1:]: kc_citations.loc[kc_citations['Case Number'] == existing_kcso_citations.loc[idx,'Case Number'],col_name] = \ existing_kcso_citations.loc[idx,col_name] # In[343]: # identify records not already present in existing data new_indices = [idx for idx, case_num in enumerate(kcso_citations['Case Number']) \ if case_num not in kc_citations['Case Number'].values] new_kcso_citations = kcso_citations.loc[new_indices] # add missing columns new_kcso_citations['Court Name'] = "Unknown (King County Sheriff's Office records)" new_kcso_citations['Disposition'] = 'Unknown' new_kcso_citations['Originating Query'] = 'Helmet violations only (requested KCHC 9.10.010 and 9.15.010)' # add to rest of data kc_citations = pd.merge(kc_citations,new_kcso_citations,how='outer') # #### Load and merge SeaTac Municipal Court records # In[344]: stmc_citations = pd.read_excel(data_dir + '2020-08-06 - SeaTac Municipal Court helmet infraction data - transcribed copy.xlsx', header=0) # redact identifying defendant data and unneeded columns stmc_citations = stmc_citations.drop(columns=['Defendant Name']) # identify records not already present in existing data new_indices = [idx for idx, case_num in enumerate(stmc_citations['Case Number']) \ if case_num not in kc_citations['Case Number'].values] stmc_citations = stmc_citations.loc[new_indices] # display(stmc_citations) # In[345]: # approximate violation date as case file date stmc_citations['Violation Date'] = stmc_citations['Case File Date'].copy() # add missing columns stmc_citations['Court Name'] = 'SeaTac Municipal Court (via PDR)' stmc_citations['Law Enforcement Agency'] = "King County Sheriff's Office / SeaTac Police Department" stmc_citations['Case Type'] = 'Infraction' stmc_citations['Disposition'] = 'Unknown' stmc_citations['Defendant Gender'] = 'Unknown' stmc_citations['Defendant Race'] = 'Unknown/Other' stmc_citations['Defendant Ethnicity'] = 'Unknown' stmc_citations['Originating Query'] = 'Helmet violations only (requested KCHC 9.10.010 and 9.15.010, STMC 9.30.020 and 9.30.060)' # add to rest of data kc_citations = pd.merge(kc_citations,stmc_citations,how='outer') # #### Load and merge Federal Way Municipal Court records # In[346]: fwmc_citations = pd.read_excel(data_dir + '2020-08-05 - Federal Way Municipal Court helmet infraction data (2010-06-14 to 2014-06-08).xlsx', header=0,parse_dates=[0]) # match missing citation numbers where possible; otherwise, assign unique citation numbers with "Unknown_FWMC" in their names unknown_counter = 1 for idx in fwmc_citations.index: if fwmc_citations.loc[idx,'Citation Number'] == 'Unknown': citation_match = logical_and.reduce((kc_citations['Court Name'] == 'Federal Way Municipal Court', kc_citations['Violation Date'] == fwmc_citations.loc[idx,'Date Issued'], kc_citations['Defendant Name'].str.split(',').str[0] \ == fwmc_citations.loc[idx,'Defendant'].split(',')[0])) if sum(citation_match) > 0: fwmc_citations.loc[idx,'Citation Number'] = kc_citations.loc[where(citation_match),'Case Number'].values else: fwmc_citations.loc[idx,'Citation Number'] = f'Unknown_FWMC_Assigned_{unknown_counter:03}' unknown_counter += 1 # redact identifying defendant data and unneeded columns fwmc_citations = fwmc_citations.drop(columns=['Defendant']) # also redact identifying defendant data from AOC records kc_citations = kc_citations.drop(columns=['Defendant Name']) # identify records not already present in existing data new_indices = [idx for idx, case_num in enumerate(fwmc_citations['Citation Number']) \ if case_num not in kc_citations['Case Number'].values] fwmc_citations = fwmc_citations.loc[new_indices] # display(fwmc_citations) # In[347]: # drop columns with missing/poor/unneeded data fwmc_citations = fwmc_citations.drop(columns=['Race/Gender','Citation Location','Officer','Unnamed: 8']) # standardize column names fwmc_citations = fwmc_citations.rename(columns={'Date Issued':'Violation Date', 'Offense Description':'Law Description', 'Citation Number':'Case Number', 'Badge #':'Officer Badge Number'}) # standardize data fwmc_citations.replace({'Law Description':{'RIDING BICYCLE W/O HELMET':'Bicycle Helmet Required'}},inplace=True) # add missing columns fwmc_citations['Court Name'] = 'Federal Way Municipal Court (via PDR)' fwmc_citations['Law Enforcement Agency'] = 'Federal Way Police Department' fwmc_citations['Case Type'] = 'Infraction' fwmc_citations['Law Code'] = 'Federal Way Revised Code 8.25.060' fwmc_citations['Disposition'] = 'Unknown' fwmc_citations['Defendant Gender'] = 'Unknown' fwmc_citations['Defendant Race'] = 'Unknown/Other' fwmc_citations['Defendant Ethnicity'] = 'Unknown' fwmc_citations['Originating Query'] = 'Helmet violations only (requested FWRC 8.25.060)' # add to rest of data kc_citations = pd.merge(kc_citations,fwmc_citations,how='outer') # #### Load and merge Kent Municipal Court records # In[348]: kmc_citations = pd.read_excel(data_dir + '2020-07-28 - Kent Municipal Court helmet infraction data (2007-10-30 to 2020-06-15).xlsx', header=4,parse_dates=[0,6,12]) # nullify invalid date entries kmc_citations.loc[kmc_citations['Dispo Date'] < datetime(1990,1,1),'Dispo Date'] = NaN # fill duplicate rows with valid data from above or below, then remove duplicates kmc_citations['Official Officer Badge Number'].fillna(method='ffill',axis=0,limit=1,inplace=True) kmc_citations['Person Race'].fillna(method='bfill',axis=0,limit=1,inplace=True) kmc_citations['Person Gender'].fillna(method='bfill',axis=0,limit=1,inplace=True) kmc_citations = kmc_citations.drop_duplicates().reset_index(drop=True) # redact identifying defendant data and unneeded columns kmc_citations.drop(columns=['Case Def Name','Unnamed: 2','Unnamed: 9','Unnamed: 11'],inplace=True) # identify records not already present in existing data new_indices = [idx for idx, case_num in enumerate(kmc_citations['Case #']) \ if case_num not in kc_citations['Case Number'].values] kmc_citations = kmc_citations.loc[new_indices] # In[349]: # standardize column names kmc_citations = kmc_citations.rename(columns={'Filed Date':'Case File Date', 'Case #':'Case Number', 'Type':'Case Type', 'LEA':'Law Enforcement Agency', 'Official Officer Badge Number':'Officer Badge Number', 'Viol Date':'Violation Date', 'Person Race':'Defendant Race', 'Person Gender':'Defendant Gender', 'Dispo Date':'Disposition Date', 'Dispo':'Disposition'}) # standardize data kmc_citations.replace({'Case Type':{'IN':'Infraction', 'IT':'Infraction Traffic'}, 'Law Enforcement Agency':{'KNP':'Kent Police Department'}, 'Defendant Race':{'Asian':'Asian or Pacific Islander'}, 'Disposition':{'C':'Committed', 'P':'Paid', 'D':'Dismissed'}},inplace=True) # add missing columns kmc_citations['Court Name'] = 'Kent Municipal Court (via PDR)' kmc_citations['Law Code'] = 'Kent City Code 9.41.030' kmc_citations['Law Description'] = 'Bicycle Helmet Required' kmc_citations['Defendant Ethnicity'] = 'Unknown' kmc_citations['Originating Query'] = 'Helmet violations only (requested KCHC 9.10.010 and 9.15.010, KCC 9.41.030 and 9.41.070)' # add to rest of data kc_citations = pd.merge(kc_citations,kmc_citations,how='outer') # #### In all previously loaded records, split officer name into first/middle/last # In[350]: for row_idx in kc_citations.index: ofc_name = kc_citations.loc[row_idx,'Officer Name'] if type(ofc_name) == float: # NaN value pass else: ofc_name = ofc_name.replace('.',' ') if ',' in ofc_name: # assume single comma and order: last, first (middle) last, first_middle = ofc_name.split(',') first_middle = first_middle.split() if len(first_middle) == 1: first = first_middle[0] middle = NaN elif len(first_middle) == 2: if len(first_middle[1]) == 1: first, middle = first_middle else: first = first_middle[0] + ' ' + first_middle[1] middle = NaN else: print('ERROR: found first + middle name with over 2 components') else: # assume order: first (middle) last components = ofc_name.split() if len(components) == 2: first, last = components middle = NaN elif len(components) == 3: if len(components[1]) == 1: first, middle, last = components else: # ambiguous if 2nd component is part of first or last name; assume two-part first name first = components[0] + ' ' + components[1] last = components[2] middle = NaN elif len(components) == 4: if len(components[1]) == 1: first, middle, last1, last2 = components last = last1 + ' ' + last2 elif len(components[2]) == 1: first1, first2, middle, last = components first = first1 + ' ' + first2 else: # ambiguous first1, first2, last1, last2 = components first = first1 + ' ' + first2 last = last1 + ' ' + last2 middle = NaN else: print('ERROR: found name with 1 component or over 4 components') kc_citations.loc[row_idx,'Officer First Name'] = first kc_citations.loc[row_idx,'Officer Middle Name'] = middle kc_citations.loc[row_idx,'Officer Last Name'] = last # remove original, inconsistently-formatted name column kc_citations.drop(columns=['Officer Name'],inplace=True) # #### Load and merge Seattle Municipal Court records (batches #1 and #2) # In[351]: # load Seattle Municipal Court infraction records from 2003-2016 (incomplete due to purge schedule) smc_citations_2003_2016 = pd.read_excel(data_dir + '2020-07-20 - Seattle Municipal Court bicycle violations (2003-01-01 to 2016-12-31).xlsx', index_col=None,parse_dates=[0,1]) # load Seattle Municipal Court infraction records from 2017-2020 smc_citations_2017_2020 = pd.read_excel(data_dir + '2020-07-16 - Seattle Municipal Court bicycle violations (2017-01-01 to 2020-06-30).xlsx', index_col=None,parse_dates=[0,1]) # fix misnamed columns in records from 2003-2016 temp_officer_number = smc_citations_2003_2016['Officer First Name'].copy() temp_first_name = smc_citations_2003_2016['Officer Middle Initial'].copy() temp_middle_initial = smc_citations_2003_2016['Officer Last Name'].copy() temp_last_name = smc_citations_2003_2016['Officer Number'].copy() smc_citations_2003_2016['Officer First Name'] = temp_first_name smc_citations_2003_2016['Officer Middle Initial'] = temp_middle_initial smc_citations_2003_2016['Officer Last Name'] = temp_last_name smc_citations_2003_2016['Officer Number'] = temp_officer_number # merge old and new data smc_citations = pd.concat([smc_citations_2003_2016,smc_citations_2017_2020]).copy().reset_index(drop=True) # remove duplicate entries (same defendant, same violation) # but retain different violations (e.g. helmet and light violations) issued to same defendant under same citation number # NOTE: this will impact analyses of officers, since it seems most (if not all) duplicates list two different officers smc_citations = smc_citations.drop_duplicates(subset=['Citation Number','Violation Code'],keep='first').reset_index(drop=True) # drop unneeded columns smc_citations.drop(columns=['Violation Location'],inplace=True) # load Seattle Municipal Court infraction records from 2003-2021 # (including charge amounts and status, plus more defendant data) smc_batch2_pt1 = pd.read_excel(data_dir + '2021-01-28 - Seattle Municipal Court bicycle violations (2003-01-01 to 2016-12-31).xlsx', index_col=None,parse_dates=[12,13]) smc_batch2_pt2 = pd.read_excel(data_dir + '2021-02-16 - Seattle Municipal Court bicycle violations (2017-01-01 to 2021-02-16).xlsx', index_col=None,parse_dates=[12,13]) smc_citations_batch2 = pd.concat([smc_batch2_pt1,smc_batch2_pt2]).copy().reset_index().drop(columns='index') smc_citations_batch2.rename(columns={'Race Code':'Defendant Race','Gender Code':'Defendant Gender'},inplace=True) # drop unneeded columns and standardize column names smc_citations_batch2.drop(columns=['Case Number','City','State','Zip Code', 'Defendant First Name','Defendant Last Name', 'Violation Location','In Collection','Obligation Number'],inplace=True) # remove duplicate entries (same defendant, same violation) – SEE DETAILS ABOVE IN SIMILAR LINE smc_citations_batch2 = smc_citations_batch2.drop_duplicates(subset=['Citation Number','Violation Code'],keep='first').reset_index(drop=True) # fix financial charge information, based on information from Gary Ireland, SMC PIO: # "Instances where the Obligation Amount Outstanding is either 0 or less than the difference # between the Assessed and Paid amounts, is caused by waiver or write-off by a judge for # one reason or another." # this appears to be accurate for all citations issued ("Violation Date") in 2011 onwards; # however, inspection of the data shows nearly all records prior to 2011 have a zero value for # "Obligation Amount Outstanding," which is highly suspicious. thus, we infer: # - prior to 2011: AR Ordered Amount = "Amount Assessed" # AR Adjusted Amount = NaN (no data available) # - from 2011 onwards: AR Ordered Amount = "Amount Assessed" (NOTE: this doesn't seem to include late/default penalties) # AR Adjusted Amount = MIN("Amount Assessed", "Amount Paid"+"Amount Outstanding") smc_citations_batch2.loc[smc_citations_batch2['Violation Date'].dt.year < 2011,'AR Adjusted Amount'] = NaN smc_citations_batch2['TEMP - paid plus outstanding'] \ = smc_citations_batch2['Obligation Amount Paid'] + \ smc_citations_batch2['Obligation Amount Outstanding'] smc_citations_batch2['TEMP - inferred adjusted amount'] \ = smc_citations_batch2[['Obligation Amount Assessed','TEMP - paid plus outstanding']].min(axis=1) smc_citations_batch2.loc[smc_citations_batch2['Violation Date'].dt.year >= 2011,'AR Adjusted Amount'] \ = smc_citations_batch2.loc[smc_citations_batch2['Violation Date'].dt.year >= 2011,'TEMP - inferred adjusted amount'] smc_citations_batch2.drop(columns=['TEMP - paid plus outstanding', 'TEMP - inferred adjusted amount'],inplace=True) smc_citations_batch2 \ = smc_citations_batch2.rename(columns={'Obligation Amount Assessed':'AR Ordered Amount', 'Obligation Amount Paid':'AR Paid Amount', 'Obligation Amount Outstanding':'AR Due Amount'}) # merge two batches of SMC records # NOTE: see below for "fix" to merge duplicate rows with slightly different missing (NaN) columns smc_citations = pd.merge(smc_citations,smc_citations_batch2,how='outer') # merge date and time columns smc_citations['Violation Datetime'] = smc_citations['Violation Date'] \ + pd.to_timedelta(floor(smc_citations['Violation Time']/100),'h') \ + pd.to_timedelta(smc_citations['Violation Time'] - 100*floor(smc_citations['Violation Time']/100),'m') # drop unneeded columns smc_citations.drop(columns=['Violation Time'],inplace=True) # handle entries missing Defendant Race information smc_citations['Defendant Race'] = smc_citations['Defendant Race'].fillna('U') # clean up officer badge numbers smc_citations['Officer Number'] = smc_citations['Officer Number'].replace(9999,NaN) # fix previous merge by merging duplicate rows with slightly different missing (NaN) columns smc_citations = smc_citations.groupby(['Citation Number','Violation Code'])\ .apply(lambda x: x.fillna(method='ffill').\ fillna(method='bfill').\ drop_duplicates()).\ reset_index(drop=True) # display(smc_citations) # In[352]: # standardize column names smc_citations = smc_citations.rename(columns={'Filing Date':'Case File Date', 'Citation Number':'Case Number', 'Violation Code':'Law Code', 'Violation Description':'Law Description', 'Officer Middle Initial':'Officer Middle Name', 'Officer Number':'Officer Badge Number'}) # standardize data smc_citations['Law Description'] = smc_citations['Law Description'].str.title() smc_citations['Officer First Name'] = smc_citations['Officer First Name'].str.title() smc_citations['Officer Last Name'] = smc_citations['Officer Last Name'].str.title() smc_citations['Law Code'] = [old_str.replace('SMC','Seattle Municipal Code ') if old_str[:3] == 'SMC' else old_str \ for old_str in smc_citations['Law Code']] smc_citations['Law Code'] = ['Seattle Municipal Code ' + old_str if old_str[:5] == '11.44' else old_str \ for old_str in smc_citations['Law Code']] smc_citations.replace({'Law Code':{'R9.15.010':'King County Health Code 9.10.010A or 9.15.010', 'KCHC9.10.010':'King County Health Code 9.10.010A or 9.15.010', # assuming section A 'H9.10.010':'King County Health Code 9.10.010A or 9.15.010'}, # assuming section A 'Law Description':{'Rights And Duties Of Riders':'Bicycle Rider Rights/Duties Violation', 'Bike Rider Rights/Duties Violation':'Bicycle Rider Rights/Duties Violation', 'Bike, Lamps/Reflectors':'Bicycle No/Improper Lamps/Reflectors', 'Bike, R/W Xwalk':'Bicycle Yield Right of Way in Crosswalk', 'Bike R/W In Crosswalk':'Bicycle Yield Right of Way in Crosswalk', 'Bike, Ride On Road':'Bicycle Riding on Roadways Rules Violation', 'Bike Riding On Road Rules Viol':'Bicycle Riding on Roadways Rules Violation', 'Bike No/Improper Lamps/Reflectors':'Bicycle No/Improper Lamps/Reflectors', 'Bike Unsafe Pass On Right':'Bicycle Unsafe Pass on Right', 'Bike Yield R/W On Public Path':'Bicycle Riding on Sidewalk/Public Path Rules Violation', 'Bike, Yield R/W':'Bicycle Riding on Sidewalk/Public Path Rules Violation', 'Bike, Pass, Right':'Bicycle Unsafe Pass on Right', 'Bike No/Improper Hand Signals':'Bicycle No/Improper Hand Signals', 'Bike, Hand Signals':'Bicycle No/Improper Hand Signals', 'Bike, No Brakes':'Bicycle No/Improper Brakes', 'Bike No/Improper Brakes':'Bicycle No/Improper Brakes', 'Bike, Control':'Bicycle Control One Hand Minimum Required', "Bike Control One Hand Min Req'D":'Bicycle Control One Hand Minimum Required', 'Bike, Clinging To Veh':'Bicycle Clinging to Vehicle', 'Bike Clinging/Attaching To Vehicle':'Bicycle Clinging to Vehicle', 'Bike, Excess Passengers':'Bicycle Excess Passengers', 'Bike, Fail To Ride On Seat':'Bicycle Failure to Ride on Seat', 'Bike, Responsibility - Child/Parent':'Bicycle Parent of Child Responsibility', 'Bike Ride >2 Abreast Road/Sidewalk':'Bicycle Riding More Than 2 Abreast'}, 'Defendant Race':{'W':'White','B':'Black','A':'Asian or Pacific Islander', 'I':'American Indian or Alaskan Native','U':'Unknown/Other'}, 'Defendant Gender':{'M':'Male','F':'Female','U':'Unknown'}},inplace=True) # add missing columns smc_citations['Court Name'] = 'Seattle Municipal Court (via PDR)' smc_citations['Case Type'] = 'Infraction' smc_citations['Law Enforcement Agency'] = 'Seattle Police Department' smc_citations['Disposition'] = 'Unknown' smc_citations['Defendant Ethnicity'] = 'Unknown' smc_citations['Originating Query'] = 'All bike violations (requested KCHC 9.10.010 and 9.15.010, SMC 11.44)' # for third time, remove duplicate entries (same defendant, same violation) – SEE DETAILS ABOVE IN SIMILAR LINE # this is necessary because "Violation Code" column was not yet homogenized above smc_citations = smc_citations.drop_duplicates(subset=['Case Number','Law Code'],keep='first').reset_index(drop=True) # identify records not already present in existing data # (NOTE: for SMC data, all records are not already present, so this is a formality) new_indices = [idx for idx, case_num in enumerate(smc_citations['Case Number']) \ if case_num not in kc_citations['Case Number'].values] smc_citations = smc_citations.loc[new_indices] # add to rest of data kc_citations = pd.merge(kc_citations,smc_citations,how='outer') # #### Assign towns/cities/jurisdictions for each record # In[353]: def assign_city(city_name,lea_is=None,lea_contains=None,law_contains=None,court_contains=None): # boolean mask for rows without a town/city/jurisdiction assignment city_unassigned_mask = kc_citations['City'].apply(lambda x: not isinstance(x, str)) # assign town/city/jurisdiction based on LEA and other criteria if lea_is is not None: lea_mask = kc_citations['Law Enforcement Agency'] == lea_is elif lea_contains is not None: lea_mask = kc_citations['Law Enforcement Agency'].str.contains(lea_contains) if law_contains is not None: assign_mask = logical_and.reduce((city_unassigned_mask,lea_mask, kc_citations['Law Code'].str.contains(law_contains))) else: assign_mask = logical_and(city_unassigned_mask,lea_mask) if court_contains is not None: assign_mask = logical_and(assign_mask,kc_citations['Court Name'].str.contains(court_contains)) kc_citations.loc[assign_mask,'City'] = city_name # In[354]: # near-certain assignments made based on law enforcement agency, court, and local code referenced assign_city('Pacific',lea_is='Algona Police Department',law_contains='Pacific Municipal Code') assign_city('Algona',lea_is='Algona Police Department') assign_city('Auburn',lea_is='Auburn Police Department') assign_city('Bellevue',lea_is='Bellevue Police Department') assign_city('Black Diamond',lea_is='Black Diamond Police Department') assign_city('Bothell',lea_is='Bothell Police Department') assign_city('Burien',lea_contains='Burien Police Department') assign_city('Covington',lea_contains='Covington Police Department') assign_city('Des Moines',lea_is='Des Moines Police Department') assign_city('Enumclaw',lea_is='Enumclaw Police Department') assign_city('Federal Way',lea_is='Federal Way Police Department') assign_city('Issaquah',lea_is='Issaquah Police Department') assign_city('Kent',lea_is='Kent Police Department') assign_city('Kirkland',lea_is='Kirkland Police Department') assign_city('Lake Forest Park',lea_is='Lake Forest Park Police Department') assign_city('Mercer Island',lea_is='Mercer Island Police Department') assign_city('Pacific',lea_is='Pacific Police Department') assign_city('Redmond',lea_is='Redmond Police Department') assign_city('Renton',lea_is='Renton Police Department') assign_city('Sammamish',lea_contains='Sammamish Police Department') assign_city('SeaTac',lea_contains='SeaTac Police Department') assign_city('Seattle',lea_is='Seattle Police Department') assign_city('Seattle',lea_is='University Of Washington Police Department') assign_city('Seattle',lea_is='Port of Seattle Police Department') assign_city('Tukwila',lea_is='Tukwila Police Department') assign_city('Kirkland',lea_contains='Woodinville Police Department',court_contains='Kirkland Municipal Court') assign_city('Woodinville',lea_contains='Woodinville Police Department') assign_city('North Bend',lea_is="King County Sheriff's Office / North Bend Police Department") assign_city('Issaquah',lea_is='Snoqualmie/North Bend Police Department',law_contains='Issaquah Municipal Code') assign_city('Snoqualmie',lea_is='Snoqualmie/North Bend Police Department',law_contains='Snoqualmie Municipal Code') assign_city('Kenmore',lea_contains='Kenmore Police Department') assign_city('Shoreline',lea_contains='Shoreline Police Department') assign_city('Maple Valley',lea_contains='Maple Valley Police Department') assign_city('Maple Valley',lea_is="King County Sheriff's Office",law_contains='Maple Valley Municipal Code') # ambiguous assignments assign_city('Snoqualmie',lea_is='Snoqualmie/North Bend Police Department') # 6 citations (could be Snoqualmie or North Bend) assign_city('Unincorporated King County',lea_is='Washington State Patrol') # 10 citations without any city ties (all within KCDC jurisdiction) assign_city('Unincorporated King County',lea_is="King County Sheriff's Office") # 81 citations without any city ties (all within KCDC jurisdiction) # [to save] lookup code (similar to count_values but does not exclude NaN values) # kc_citations.loc[kc_citations['Law Enforcement Agency'].str.contains('Maple Valley')]\ # .groupby(['Court Name','Law Enforcement Agency','Law Code','City'],dropna=False,as_index=False).size() # #### Export compiled, homogenized, quality-controlled citation records # In[355]: kc_citations.to_excel(data_dir + '2021-10-18 - compiled King County bike citation records.xlsx')