import pandas as pd
pd.options.display.max_rows = 130
pd.options.display.max_columns = 130
import matplotlib.pyplot as plt
% matplotlib inline
plt.style.use('seaborn-poster')
plt.style.use('ggplot')
import analysis as an
import sys
sys.path.insert(0, '../2-Data/')
import databuild as db
/usr/local/lib/python2.7/dist-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead. from pandas.core import datetools
The MV104 crash database is compiled by DMV into three tables:
The databuild.py script reads in the tables and organizes them into a person level table for pedestrians or bicyclists involved in 1-vehicle crashes.
The script also reads in the linked hospital-crash data from DOHMH and adds the hospital info (e.g. the person's b-ISS score) onto the ped file.
The resulting dataframe (ped) is what the rest of the analyis in this notebook uses. It only includes injured people who were able to be linked to hopital data by DOHMH.
# read in DMV data into 3 tables
crash,ind,veh = db.readDMV()
# reorganize into pedestrian/bicyclist 1-veh crashes
ped = db.buildTablesDMV(crash,ind,veh)
#read in DMV-SPARCS linked data
linked = db.readLinked()
# included biss data from linked onto ped (dropping anything not in linked)
ped = db.mergeBiss(ped,linked)
#format and categorize variables
ped = db.formatVars(ped)
print 'linked ped',ped.shape
/usr/local/lib/python2.7/dist-packages/pandas/core/computation/check.py:17: UserWarning: The installed version of numexpr 2.4.3 is not supported in pandas and will be not be used The minimum supported version is 2.4.6 ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning)
full crash table (522108, 26) full person table (1502797, 22) full vehicle table (1092922, 20)
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py:2822: DtypeWarning: Columns (1,9,20,25,35,48,54,58,63,65,85,89,102,126,128,129,138) have mixed types. Specify dtype option on import or set low_memory=False. if self.run_code(code, result):
pedestrians/bicyclists (police reported) (single vehicle) (95292, 80) linked (76763, 131) linked after dropping no police reports (69657, 131) linked ped (17624, 106)
Patients with a derived Injury Severity Score (b-ISS) of 9 or greater we defined as a severe case. While generally the bar is set at ISS 16 or greater (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3217501/ ), we chose a lower threshold to take into account injuries such as a broken leg (ISS = 9) which may be low on a threat to life scale but can have significant impact on a person’s ability to work and get around in the near term.
We could have chosen to do an analysis without assigning a threshold (for instance using an AUC measure), however most implementations would necessitate defining a threshold at some point.
We use the Severity Ratio to understand which crash attributes are associated with a severe outcome. The Severity Ratio (SR) is defined as the probability of a severe injury outcome given an attribute was present divided by the probability of a severe injury outcome (irrespective of whether the attribute is present or not). (Note: this is slightly different from the usual definition of Risk Ratio where the denominator is probability of a severe outcome given the attribute is NOT present).
$$ SR = \frac{P( severe \space | \space attribute )}{P(severe)} $$If SR is much greater than 1 then having the attribute present indicates those types of crashes have a higher likelihood of a severe outcome. If SR is close to 1 then that attribute is not anymore indiciative of severity than a random crash.
The denominator of SR is calculated in the next cell (around .10 for pedestrians/bicyclists)
# baseline severity -- this is the denominator of the SR
pedNumSev = ped[ped.biss_severity_9=='severe'].shape[0]
pedNum = ped.shape[0]
pedPctSev = pedNumSev/float(pedNum)*100
print 'percent of ped with severe outcomes',pedPctSev
percent of ped with severe outcomes 9.69133000454
factorSev = an.sratio(data=ped)
factorSev.sort_values('SeverityRatio',ascending=False)
SevereCount | AllCount | SeverePct | SeverityRatio | |
---|---|---|---|---|
AgeDecade : 90.0 | 21 | 44 | 0.477273 | 4.924739 |
InjuryStatus : Not Conscious States | 417 | 935 | 0.445989 | 4.601941 |
InjuryType : Severe Bleeding | 234 | 532 | 0.439850 | 4.538589 |
InjuryType : Internal | 90 | 269 | 0.334572 | 3.452287 |
InjuryType : Amputation | 9 | 27 | 0.333333 | 3.439500 |
AgeDecade : 80.0 | 110 | 373 | 0.294906 | 3.042990 |
InjuryType : Concussion | 72 | 263 | 0.273764 | 2.824837 |
InjuryLoc : Eye | 6 | 23 | 0.260870 | 2.691783 |
Age70 : age >= 70 | 308 | 1224 | 0.251634 | 2.596486 |
OtherVehTypeVIN : Truck | 34 | 141 | 0.241135 | 2.488149 |
InjuryLoc : Head | 610 | 2662 | 0.229151 | 2.364495 |
AgeDecade : 70.0 | 177 | 807 | 0.219331 | 2.263166 |
OtherVehType : Motorcycle | 21 | 96 | 0.218750 | 2.257172 |
OtherVehTypeVIN : Motorcycle | 16 | 74 | 0.216216 | 2.231027 |
InjuryType : Fracture-Dislocation | 181 | 846 | 0.213948 | 2.207623 |
TimeOfDay : 3am-6am | 96 | 499 | 0.192385 | 1.985122 |
OtherVehType : Truck | 59 | 325 | 0.181538 | 1.873205 |
AgeDecade : 60.0 | 246 | 1426 | 0.172511 | 1.780050 |
OtherVehType : Bus | 53 | 332 | 0.159639 | 1.647231 |
PedAction : Crossing, Against Signal | 344 | 2247 | 0.153093 | 1.579690 |
InjuryLoc : Entire Body | 325 | 2166 | 0.150046 | 1.548252 |
TimeOfDay : midnight-3am | 99 | 661 | 0.149773 | 1.545434 |
InjuryLoc : Chest | 25 | 176 | 0.142045 | 1.465696 |
InjuryType : Minor Bleeding | 220 | 1559 | 0.141116 | 1.456107 |
OtherVehTypeVIN : Bus | 25 | 180 | 0.138889 | 1.433125 |
InjuryType : unknown | 73 | 570 | 0.128070 | 1.321492 |
OtherVehType : Van | 81 | 636 | 0.127358 | 1.314149 |
DriverAgeDecade : 90.0 | 2 | 16 | 0.125000 | 1.289813 |
DriverAgeDecade : 80.0 | 27 | 219 | 0.123288 | 1.272144 |
OtherVehTypeVIN : Van | 122 | 991 | 0.123108 | 1.270290 |
OtherVehAction : Going Straight Ahead | 1008 | 8191 | 0.123062 | 1.269814 |
InjuryType : Moderate/Severe Burn | 5 | 41 | 0.121951 | 1.258354 |
OtherVehTypeVIN : Pickup | 27 | 223 | 0.121076 | 1.249325 |
AgeDecade : 50.0 | 249 | 2081 | 0.119654 | 1.234650 |
Lighting : Dark-Road | 693 | 5815 | 0.119175 | 1.229703 |
PedAction : Crossing, No Signal or Crosswalk | 320 | 2743 | 0.116661 | 1.203762 |
TimeOfDay : 9pm-midnight | 245 | 2112 | 0.116004 | 1.196985 |
DriverAge70 : age >= 70 | 90 | 794 | 0.113350 | 1.169603 |
Lighting : unknown | 20 | 177 | 0.112994 | 1.165932 |
RoadSurface : unknown | 20 | 178 | 0.112360 | 1.159382 |
PedAction : unknown | 203 | 1844 | 0.110087 | 1.135930 |
OtherVehTypeVIN : SUV | 240 | 2193 | 0.109439 | 1.129248 |
DriverAgeDecade : 70.0 | 61 | 559 | 0.109123 | 1.125990 |
DriverAgeDecade : 40.0 | 339 | 3150 | 0.107619 | 1.110467 |
OtherVehType : Pickup | 26 | 243 | 0.106996 | 1.104037 |
InjuryLoc : Abdomen-Pelvis | 33 | 309 | 0.106796 | 1.101976 |
DriverSex : male | 1125 | 10660 | 0.105535 | 1.088960 |
DriverAgeDecade : 60.0 | 169 | 1618 | 0.104450 | 1.077767 |
Sex : male | 1027 | 9888 | 0.103863 | 1.071713 |
OtherVehType : Suburban | 549 | 5286 | 0.103859 | 1.071672 |
DriverAgeDecade : 30.0 | 306 | 2964 | 0.103239 | 1.065270 |
Role : pedestrian | 1448 | 14133 | 0.102455 | 1.057185 |
OtherVehAction : unknown | 63 | 620 | 0.101613 | 1.048493 |
TrafficControl : None | 601 | 5939 | 0.101195 | 1.044186 |
DriverAge70 : age < 70 | 1340 | 13259 | 0.101063 | 1.042823 |
TrafficControl : Traffic signal | 975 | 9813 | 0.099358 | 1.025226 |
Eject : unknown | 1526 | 15405 | 0.099059 | 1.022138 |
PedLoc : at intersection | 373 | 3767 | 0.099018 | 1.021715 |
Weather : Clear | 1248 | 12716 | 0.098144 | 1.012700 |
Weather : unknown | 20 | 204 | 0.098039 | 1.011618 |
RoadSurface : Dry | 1364 | 13963 | 0.097687 | 1.007981 |
InjuryLoc : unknown | 71 | 727 | 0.097662 | 1.007722 |
Lighting : Dawn/Dusk | 112 | 1151 | 0.097307 | 1.004059 |
OtherVehTypeVIN : Minivan | 31 | 319 | 0.097179 | 1.002738 |
TimeOfDay : 6am-9am | 181 | 1874 | 0.096585 | 0.996611 |
PedLoc : unknown | 1209 | 12536 | 0.096442 | 0.995139 |
TrafficControl : unknown | 52 | 542 | 0.095941 | 0.989967 |
DriverAgeDecade : 20.0 | 236 | 2468 | 0.095624 | 0.986696 |
InjuryLoc : Hip-Upper Leg | 159 | 1663 | 0.095610 | 0.986555 |
PedLoc : not at intersection | 126 | 1321 | 0.095382 | 0.984202 |
DriverAgeDecade : 50.0 | 270 | 2832 | 0.095339 | 0.983755 |
Eject : ejected | 100 | 1062 | 0.094162 | 0.971610 |
Weather : Percipitation | 248 | 2645 | 0.093762 | 0.967481 |
PedAction : Other | 210 | 2244 | 0.093583 | 0.965635 |
Weather : Cloudy | 192 | 2059 | 0.093249 | 0.962191 |
RoadSurface : Not Dry | 324 | 3483 | 0.093023 | 0.959861 |
OtherVehTypeVIN : unknown | 525 | 5719 | 0.091799 | 0.947231 |
InjuryLoc : Face | 46 | 504 | 0.091270 | 0.941768 |
TimeOfDay : 6pm-9pm | 355 | 3896 | 0.091119 | 0.940213 |
TimeOfDay : noon-3pm | 234 | 2610 | 0.089655 | 0.925107 |
DriverSex : female | 306 | 3424 | 0.089369 | 0.922156 |
OtherVehTypeVIN : Car | 688 | 7784 | 0.088386 | 0.912016 |
DriverAgeDecade : 10.0 | 20 | 227 | 0.088106 | 0.909119 |
Sex : female | 681 | 7736 | 0.088030 | 0.908338 |
InjuryType : Minor Burn | 5 | 57 | 0.087719 | 0.905132 |
OtherVehType : unknown | 255 | 2953 | 0.086353 | 0.891032 |
OtherVehType : Car | 664 | 7753 | 0.085644 | 0.883720 |
Age70 : age < 70 | 1400 | 16400 | 0.085366 | 0.880848 |
Lighting : Daylight | 883 | 10481 | 0.084248 | 0.869310 |
TimeOfDay : 3pm-6pm | 319 | 3808 | 0.083771 | 0.864391 |
TimeOfDay : 9am-noon | 176 | 2118 | 0.083097 | 0.857439 |
AgeDecade : 0.0 | 88 | 1073 | 0.082013 | 0.846252 |
OtherVehAction : Backing | 88 | 1078 | 0.081633 | 0.842327 |
InjuryLoc : Neck | 40 | 493 | 0.081136 | 0.837201 |
OtherVehAction : Other | 28 | 349 | 0.080229 | 0.827845 |
DriverSex : unknown | 277 | 3540 | 0.078249 | 0.807408 |
DriverAge70 : unknown | 278 | 3571 | 0.077849 | 0.803289 |
DriverAgeDecade : unknown | 278 | 3571 | 0.077849 | 0.803289 |
InjuryStatus : Conscious states | 1245 | 16067 | 0.077488 | 0.799560 |
AgeDecade : 40.0 | 165 | 2136 | 0.077247 | 0.797075 |
PedAction : Crossing, No Signal, Marked Crosswalk | 99 | 1286 | 0.076983 | 0.794348 |
PedAction : Crossing, With Signal | 437 | 5698 | 0.076694 | 0.791363 |
OtherVehAction : Making Left Turn | 370 | 4939 | 0.074914 | 0.773000 |
Role : bicyclist | 260 | 3491 | 0.074477 | 0.768493 |
InjuryStatus : unknown | 46 | 622 | 0.073955 | 0.763105 |
TrafficControl : Other | 13 | 179 | 0.072626 | 0.749388 |
Eject : not ejected | 82 | 1157 | 0.070873 | 0.731303 |
InjuryType : Abrasion | 82 | 1171 | 0.070026 | 0.722559 |
AgeDecade : 20.0 | 277 | 3996 | 0.069319 | 0.715271 |
AgeDecade : 30.0 | 162 | 2455 | 0.065988 | 0.680895 |
AgeDecade : 10.0 | 213 | 3233 | 0.065883 | 0.679815 |
TimeOfDay : unknown | 3 | 46 | 0.065217 | 0.672946 |
OtherVehAction : Making Right Turn | 101 | 1626 | 0.062116 | 0.640940 |
InjuryType : Contusion-Bruise | 88 | 1421 | 0.061928 | 0.639006 |
InjuryType : Complaint of Pain | 548 | 8983 | 0.061004 | 0.629471 |
OtherVehAction : Stopping Starting | 50 | 821 | 0.060901 | 0.628411 |
PedAction : Along Highway | 95 | 1562 | 0.060819 | 0.627566 |
InjuryType : Whiplash | 2 | 34 | 0.058824 | 0.606971 |
TrafficControl : Stop sign | 67 | 1151 | 0.058210 | 0.600643 |
InjuryLoc : Back | 63 | 1138 | 0.055360 | 0.571235 |
InjuryLoc : Shoulder-Upper Arm | 63 | 1140 | 0.055263 | 0.570233 |
InjuryType : None Visible | 99 | 1851 | 0.053485 | 0.551881 |
InjuryLoc : Knee-Lower Leg-Foot | 221 | 5159 | 0.042838 | 0.442021 |
InjuryLoc : Elbow-Lower Arm-Hand | 46 | 1464 | 0.031421 | 0.324215 |
samples = an.bootstrapSR(ped,N=100)
an.plotSR(factorSev,samples)
#clean up the graph, get rid of age by decades and the redundant VIN vehicle type
# drop age decades
ped_df = ped.drop(ped.filter(like='Decade').columns,axis=1)
# drop vehicle types coming from VIN (redundant)
ped_df = ped_df.drop('f_OtherVehType',axis=1)
factorSev = an.renameLabelSR(an.sratio(ped_df))
samples = an.renameLabelSR(an.bootstrapSR(ped_df,N=100))
an.plotSR(factorSev,samples,minSR=1.5)
# redo graph with yellow bars for K,A injuries from KABCO
an.plotSR(factorSev,samples,minSR=1.5,
colors=['lightblue']*4+['y']+['lightblue']*4+['y']*6)
# show above in table format
errmax = samples.quantile(.95, axis = 1)
errmin = samples.quantile(.05, axis = 1)
output = pd.DataFrame()
output['max'] = errmax[factorSev.SeverityRatio>1.5]
output['min'] = errmin[factorSev.SeverityRatio>1.5]
output['mid'] = factorSev[factorSev.SeverityRatio>1.5].SeverityRatio
output.sort_values('mid',ascending=False)
max | min | mid | |
---|---|---|---|
Injury Status : Not Conscious States | 4.901997 | 4.295233 | 4.601941 |
Injury Type : Severe Bleeding | 4.868213 | 4.222310 | 4.538589 |
Injury Type : Internal | 3.862953 | 2.985556 | 3.452287 |
Injury Type : Amputation | 5.031100 | 1.880770 | 3.439500 |
Injury Type : Concussion | 3.269523 | 2.443518 | 2.824837 |
Injury Location : Eye | 4.340343 | 1.201093 | 2.691783 |
Age70 : age >= 70 | 2.809461 | 2.400650 | 2.596486 |
Vehicle Type : Truck | 3.151897 | 2.029730 | 2.488149 |
Injury Location : Head | 2.485255 | 2.251381 | 2.364495 |
Vehicle Type : Motorcycle | 3.018137 | 1.583859 | 2.231027 |
Injury Type : Fracture-Dislocation | 2.444508 | 1.996486 | 2.207623 |
Time of Day : 3am-6am | 2.322692 | 1.735324 | 1.985122 |
Pedestrian Action : Crossing, Against Signal | 1.666802 | 1.452234 | 1.579690 |
Injury Location : Entire Body | 1.663116 | 1.431508 | 1.548252 |
Time of Day : midnight-3am | 1.770839 | 1.325084 | 1.545434 |
Let's extract only the pedestrian age and see the distribution.
ageSev = factorSev.T.filter(regex='^AgeDecade').T
samples = an.bootstrapSR(ped,N=100)
ageSevSamples = samples.T.filter(regex='^AgeDecade').T
an.plotSR(ageSev,ageSevSamples,minSR=0)
Exclude elderly to see if there are any differences
# Pedestrians under age 70
pedNotOld = ped_df[ped_df.f_AgeYear<70]
factorSevNotOld = an.sratio(pedNotOld)
samples = an.bootstrapSR(pedNotOld,N=100)
an.plotSR(factorSevNotOld,samples,minSR=1.5)
# number of cases with peds under 70 and a severe outcome
pedNotOld[pedNotOld.biss_severity_9=='severe'].shape
(1400, 103)
# fraction of severe outcomes for peds under 70
pedNotOld[pedNotOld.biss_severity_9=='severe'].shape[0]/float(pedNotOld.shape[0])
0.08536585365853659