# Data Science Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
%matplotlib inline
# Project Helper Files
from constants import *
America's 96,000 public schools provide the foundation for the next generation's education and, ultimately, future success.
Yet, educational researchers and practitioners worry that America's public schools do not provide all students equal opportunity for future success.
Some stakeholders feel the inequitable treatment of students is so severe -- particularly poor and minority students -- they've coined the term School to Prison Pipeline. That is, some schools and students are pushed, not to success, but towards the criminal justice system.
This analysis' following 3 questions will center on exploring this prominent and debated concept of "The School to Prison Pipeline" in US schools, generally defined as the disproportionate tendency of minors and young adults from disadvantaged backgrounds to become incarcerated, because of increasingly harsh school and municipal policies.
Discussions of "The School to Prison Pipeline" and its causes largely center on a few interrelated factors:
We'll explore these key factors via desegregated data on all 96,000 US public schools from the 2016/15 school year CRDC data. Because revealing causality in these factors is impossible to determine given just this data, this investigation centers on the extent and nature of these key measures at a national level.
The Civil Rights Data Collection (CRDC) is a biennial survey required by the U.S. Department of Education’s (Department) Office for Civil Rights (OCR) since 1968. (Note, however, that survey content changes over time.)
The 2015–16 CRDC (the most recent year published) collects data from all public local educational agencies (LEAs, ie School Districts) and schools, including
with a response rate of 99.8% from 17,337 LEAs and 96,360 schools. Specifically, I will be looking at the finer-grained data disaggregated by school.
Each school (row) in the dataset includes 1,800 columns (typically a student count disaggregated by race and gender for some school measure) regarding 32 general topics, comprising a 460 MB csv. The topics I will investigate utilize only 50 columns pertaining to suspensions, expulsions, and school population. I will only look at white, black, and hispanic students, who form the majority of students at nearly all schools.
DATA_FILE = 'data (download CSVs here)/crdc-data-with-lat-long.csv'
crdc_data = pd.read_csv(
DATA_FILE,
usecols=COLS_WITH_NEEDED_DATA,
low_memory=False,
encoding="ISO-8859-1"
)
LEA_STATE_NAME | SCH_NAME | SCH_ENR_HI_M | SCH_ENR_HI_F | SCH_ENR_BL_M | SCH_ENR_BL_F | SCH_ENR_WH_M | SCH_ENR_WH_F | TOT_ENR_M | TOT_ENR_F | ... | TOT_DISCWODIS_EXPZT_M | TOT_DISCWODIS_EXPZT_F | SCH_FTESECURITY_LEO | SCH_FTESECURITY_GUA | SCH_FTESERVICES_NUR | SCH_FTESERVICES_PSY | SCH_FTESERVICES_SOC | SCH_JJTYPE | LAT1516 | LON1516 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ALABAMA | Wallace Sch - Mt Meigs Campus | 5 | 0 | 71 | 0 | 50 | 0 | 128 | 0 | ... | 0 | 0 | -9.00 | 2.0 | 0.00 | 2.00 | 0.0 | -7 | 32.374812 | -86.082360 |
1 | ALABAMA | McNeel Sch - Vacca Campus | 0 | 0 | 38 | 0 | 14 | 0 | 52 | 0 | ... | 0 | 0 | -9.00 | 2.0 | 0.00 | 1.00 | 0.0 | -7 | 33.583385 | -86.710058 |
2 | ALABAMA | Alabama Youth Services | 0 | 0 | 554 | 0 | 323 | 0 | 908 | 0 | ... | 0 | 0 | -9.00 | 2.0 | 0.00 | 0.00 | 0.0 | -9 | 32.374847 | -86.082332 |
3 | ALABAMA | AUTAUGA CAMPUS | 2 | 0 | 17 | 0 | 14 | 0 | 38 | 0 | ... | 0 | 0 | -9.00 | 0.0 | 0.00 | 0.00 | 0.0 | -7 | NaN | NaN |
4 | ALABAMA | Albertville Middle School | 140 | 143 | 11 | 5 | 194 | 185 | 358 | 346 | ... | 0 | 0 | 1.00 | 0.0 | 1.00 | 0.00 | 0.0 | -9 | 34.260194 | -86.206174 |
5 | ALABAMA | Albertville High Sch | 260 | 221 | 20 | 20 | 350 | 398 | 645 | 650 | ... | 0 | 0 | 1.00 | 1.0 | 1.00 | 0.00 | 0.0 | -9 | 34.262154 | -86.204863 |
6 | ALABAMA | Evans Elem Sch | 161 | 173 | 17 | 14 | 194 | 191 | 381 | 389 | ... | 0 | 0 | 1.00 | 0.0 | 1.00 | 0.00 | 0.0 | -9 | 34.273161 | -86.220086 |
7 | ALABAMA | Albertville Elem Sch | 218 | 215 | 11 | 8 | 188 | 176 | 430 | 417 | ... | 0 | 0 | 1.00 | 0.0 | 1.00 | 0.00 | 0.0 | -9 | 34.253251 | -86.221834 |
8 | ALABAMA | Big Spring Lake Kinderg Sch | 134 | 128 | 11 | 5 | 110 | 92 | 264 | 234 | ... | 0 | 0 | 1.00 | 0.0 | 1.00 | 0.00 | 0.0 | -9 | 34.290220 | -86.192490 |
9 | ALABAMA | Albertville Primary Sch | 281 | 269 | 20 | 17 | 227 | 230 | 555 | 534 | ... | 0 | 0 | 1.00 | 0.0 | 1.00 | 0.00 | 0.0 | -9 | 34.253251 | -86.221834 |
10 | ALABAMA | Kate Duncan Smith DAR Middle | 8 | 5 | 2 | 2 | 218 | 188 | 235 | 210 | ... | 0 | 0 | 0.33 | 0.0 | 0.33 | 0.00 | 0.0 | -9 | 34.533721 | -86.253681 |
11 | ALABAMA | Asbury Sch | 92 | 95 | 0 | 2 | 191 | 149 | 289 | 250 | ... | 0 | 0 | 0.50 | 0.0 | 0.50 | 0.00 | 0.0 | -9 | 34.362770 | -86.142240 |
12 | ALABAMA | Claysville Jr High Sch | 8 | 8 | 5 | 0 | 44 | 53 | 64 | 65 | ... | 0 | 0 | -9.00 | 0.0 | 0.25 | 0.00 | 0.0 | -9 | 34.406429 | -86.270689 |
13 | ALABAMA | Douglas Elem Sch | 95 | 83 | 2 | 2 | 164 | 155 | 261 | 240 | ... | 0 | 0 | 0.25 | 0.0 | 0.25 | 0.00 | 0.0 | -9 | 34.176234 | -86.321259 |
14 | ALABAMA | Douglas High Sch | 77 | 65 | 5 | 5 | 224 | 212 | 310 | 284 | ... | 0 | 0 | 0.25 | 0.0 | 0.25 | 0.00 | 0.0 | -9 | 34.178157 | -86.319947 |
15 | ALABAMA | Brindlee Mountain Elementary School | 11 | 8 | 2 | 2 | 116 | 113 | 131 | 123 | ... | 0 | 0 | 0.25 | 0.0 | 0.25 | 0.00 | 0.0 | -9 | 34.344388 | -86.442199 |
16 | ALABAMA | Kate D Smith DAR High Sch | 2 | 2 | 2 | 2 | 230 | 215 | 236 | 223 | ... | 0 | 0 | 0.34 | 0.0 | 0.33 | 0.00 | 0.0 | -9 | 34.533721 | -86.253681 |
17 | ALABAMA | Brindlee Mountain Primary School | 5 | 5 | 2 | 2 | 119 | 95 | 128 | 102 | ... | 0 | 0 | 0.33 | 0.0 | 0.25 | 0.00 | 0.0 | -9 | 34.399966 | -86.446812 |
18 | ALABAMA | Robert D Sloman Primary | 104 | 89 | 2 | 5 | 146 | 140 | 258 | 238 | ... | 0 | 0 | 0.25 | 0.0 | 25.25 | 0.00 | 0.0 | -9 | 34.176713 | -86.323279 |
19 | ALABAMA | Brindlee Mt Middle Sch | 11 | 5 | 2 | 2 | 113 | 122 | 130 | 129 | ... | 0 | 0 | 0.25 | 0.0 | 0.33 | 0.00 | 0.0 | -9 | 34.377158 | -86.422337 |
20 | ALABAMA | Brindlee Mt High Sch | 11 | 8 | 5 | 2 | 167 | 164 | 187 | 176 | ... | 0 | 0 | 0.34 | 0.0 | 0.34 | 0.00 | 0.0 | -9 | 34.376400 | -86.421876 |
21 | ALABAMA | Kate D Smith DAR Elem Sch | 2 | 2 | 0 | 0 | 200 | 212 | 215 | 223 | ... | 0 | 0 | 0.33 | 0.0 | 0.33 | 0.00 | 0.0 | -9 | 34.533721 | -86.253681 |
22 | ALABAMA | Douglas Middle Sch | 89 | 71 | 2 | 0 | 155 | 143 | 250 | 218 | ... | 0 | 0 | 0.25 | 0.0 | 0.25 | 0.00 | 0.0 | -9 | 34.176234 | -86.321259 |
23 | ALABAMA | Asbury Elem Sch | 98 | 101 | 2 | 2 | 137 | 152 | 237 | 259 | ... | 0 | 0 | 0.50 | 0.0 | 0.50 | 0.00 | 0.0 | -9 | 34.362794 | -86.142507 |
24 | ALABAMA | Trace Crossings Elem Sch | 116 | 56 | 74 | 80 | 101 | 74 | 334 | 238 | ... | 0 | 0 | 0.00 | 0.0 | 2.00 | 0.00 | 0.0 | -9 | 33.340886 | -86.844733 |
25 | ALABAMA | Greystone Elem Sch | 20 | 20 | 23 | 20 | 227 | 173 | 307 | 256 | ... | 0 | 0 | 0.00 | 0.0 | 1.00 | 0.00 | 0.0 | -9 | 33.413047 | -86.658547 |
26 | ALABAMA | Hoover High Sch | 113 | 116 | 428 | 398 | 860 | 797 | 1518 | 1449 | ... | 0 | 0 | 0.00 | 0.0 | 3.00 | 0.00 | 1.0 | -9 | 33.344370 | -86.837683 |
27 | ALABAMA | Berry Middle Sch | 35 | 44 | 119 | 122 | 368 | 347 | 586 | 582 | ... | 0 | 0 | 0.00 | 0.0 | 3.00 | 0.00 | 0.0 | -9 | 33.395648 | -86.732180 |
28 | ALABAMA | South Shades Crest Elem Sch | 29 | 20 | 80 | 53 | 173 | 179 | 318 | 295 | ... | 0 | 0 | 0.00 | 0.0 | 1.00 | 0.00 | 0.0 | -9 | 33.337527 | -86.878390 |
29 | ALABAMA | Robert F Bumpus Middle Sch | 35 | 29 | 125 | 122 | 209 | 212 | 414 | 414 | ... | 0 | 0 | 0.00 | 0.0 | 1.00 | 0.00 | 0.0 | -9 | 33.330911 | -86.852477 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
96330 | WYOMING | Washington Elementary | 23 | 17 | 0 | 0 | 92 | 83 | 120 | 102 | ... | 0 | 0 | 0.20 | 0.0 | 0.33 | 1.00 | 0.0 | -9 | 41.510680 | -109.465821 |
96331 | WYOMING | Lincoln Middle School | 38 | 41 | 2 | 2 | 149 | 176 | 203 | 230 | ... | 0 | 0 | 0.33 | 0.0 | 0.50 | 0.00 | 1.0 | -9 | 41.510680 | -109.465821 |
96332 | WYOMING | Jackson Elementary | 23 | 29 | 0 | 2 | 86 | 104 | 116 | 141 | ... | 0 | 0 | 0.20 | 0.0 | 0.34 | 0.00 | 0.0 | -9 | 41.510680 | -109.465821 |
96333 | WYOMING | Truman Elementary | 20 | 20 | 0 | 2 | 131 | 116 | 155 | 142 | ... | 0 | 0 | 0.20 | 0.0 | 0.33 | 0.00 | 0.0 | -9 | 41.510680 | -109.465821 |
96334 | WYOMING | Harrison Elementary | 8 | 11 | 2 | 2 | 122 | 125 | 137 | 143 | ... | 0 | 0 | 0.20 | 0.0 | 1.00 | 0.50 | 1.0 | -9 | 41.510680 | -109.465821 |
96335 | WYOMING | Thoman Ranch Elementary | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2 | ... | 0 | 0 | -9.00 | 0.0 | 0.00 | 0.00 | 0.0 | -9 | 41.510680 | -109.465821 |
96336 | WYOMING | Ten Sleep K-12 | 0 | 0 | 2 | 0 | 56 | 50 | 60 | 50 | ... | 0 | 0 | 0.00 | 0.0 | 1.00 | 0.00 | 0.0 | -9 | 44.036012 | -107.447922 |
96337 | WYOMING | Colter Elementary | 113 | 119 | 0 | 0 | 167 | 149 | 290 | 270 | ... | 0 | 0 | -9.00 | 0.0 | 1.45 | 0.67 | 0.0 | -9 | 43.462312 | -110.797767 |
96338 | WYOMING | Jackson Elementary | 119 | 128 | 2 | 0 | 170 | 158 | 301 | 290 | ... | 0 | 0 | -9.00 | 0.0 | 1.00 | 1.00 | 1.0 | -9 | 43.462312 | -110.797767 |
96339 | WYOMING | Jackson Hole High School | 95 | 89 | 2 | 0 | 212 | 215 | 324 | 316 | ... | 0 | 0 | 0.75 | 0.0 | 0.95 | 1.00 | 0.0 | -9 | 43.462312 | -110.797767 |
96340 | WYOMING | Jackson Hole Middle School | 92 | 92 | 2 | 2 | 194 | 182 | 298 | 288 | ... | 0 | 0 | 1.00 | 0.0 | 0.45 | 1.00 | 0.0 | -9 | 43.462312 | -110.797767 |
96341 | WYOMING | Alta Elementary | 0 | 0 | 0 | 0 | 26 | 17 | 26 | 19 | ... | 0 | 0 | -9.00 | 0.0 | 0.00 | 0.00 | 0.0 | -9 | 43.462312 | -110.797767 |
96342 | WYOMING | Kelly Elementary | 0 | 0 | 0 | 0 | 26 | 20 | 26 | 20 | ... | 0 | 0 | -9.00 | 0.0 | 0.05 | 0.00 | 0.0 | -9 | 43.462312 | -110.797767 |
96343 | WYOMING | Moran Elementary | 2 | 2 | 0 | 0 | 8 | 8 | 10 | 10 | ... | 0 | 0 | -9.00 | 0.0 | 0.05 | 0.00 | 0.0 | -9 | 43.462312 | -110.797767 |
96344 | WYOMING | Wilson Elementary | 2 | 8 | 0 | 0 | 101 | 107 | 110 | 125 | ... | 0 | 0 | -9.00 | 0.0 | 0.00 | 0.00 | 0.0 | -9 | 43.462312 | -110.797767 |
96345 | WYOMING | Summit High School | 20 | 8 | 0 | 0 | 14 | 8 | 36 | 18 | ... | 0 | 0 | 0.25 | 0.0 | 0.05 | 0.00 | 1.0 | -9 | 43.462312 | -110.797767 |
96346 | WYOMING | Upton Middle School | 0 | 0 | 0 | 0 | 32 | 20 | 34 | 24 | ... | 0 | 0 | -9.00 | 0.0 | 0.20 | 0.00 | 0.0 | -9 | 44.101000 | -104.623594 |
96347 | WYOMING | Upton Elementary | 0 | 0 | 0 | 2 | 59 | 56 | 63 | 66 | ... | 0 | 0 | -9.00 | 0.0 | 0.45 | 0.00 | 0.0 | -9 | 44.101000 | -104.623594 |
96348 | WYOMING | Upton High School | 0 | 0 | 0 | 0 | 47 | 47 | 49 | 51 | ... | 0 | 0 | -9.00 | 0.0 | 0.35 | 0.00 | 0.0 | -9 | 44.101000 | -104.623594 |
96349 | WYOMING | Worland High School | 53 | 38 | 0 | 0 | 146 | 155 | 203 | 197 | ... | 0 | 0 | 0.20 | 0.0 | 0.20 | 0.00 | 0.2 | -9 | 44.011520 | -107.943721 |
96350 | WYOMING | Worland Middle School | 41 | 44 | 0 | 0 | 125 | 113 | 171 | 159 | ... | 0 | 0 | 0.20 | 0.0 | 0.20 | 0.00 | 0.2 | -9 | 44.011520 | -107.943721 |
96351 | WYOMING | East Side Elementary | 17 | 23 | 0 | 0 | 74 | 92 | 93 | 120 | ... | 0 | 0 | 0.20 | 0.0 | 0.20 | 0.00 | 0.2 | -9 | 44.011520 | -107.943721 |
96352 | WYOMING | South Side Elementary | 26 | 20 | 0 | 0 | 71 | 86 | 103 | 111 | ... | 0 | 0 | 0.20 | 0.0 | 0.20 | 0.00 | 0.2 | -9 | 44.011520 | -107.943721 |
96353 | WYOMING | West Side Elementary | 35 | 41 | 0 | 0 | 65 | 53 | 104 | 98 | ... | 0 | 0 | 0.20 | 0.0 | 0.20 | 0.00 | 0.2 | -9 | 44.011520 | -107.943721 |
96354 | WYOMING | Powder River Basin Children's Center | 0 | 2 | 0 | 0 | 26 | 11 | 28 | 13 | ... | 0 | 0 | -9.00 | 0.0 | 1.00 | 0.50 | 1.0 | -9 | 44.297605 | -105.494905 |
96355 | WYOMING | C-Bar-V Ranch | 5 | 2 | 0 | 0 | 26 | 5 | 41 | 9 | ... | 0 | 0 | -9.00 | 0.0 | 1.00 | 2.50 | 3.5 | -9 | 43.535575 | -110.830607 |
96356 | WYOMING | Wyoming Girls School | 0 | 8 | 0 | 2 | 0 | 53 | 0 | 82 | ... | 0 | 0 | -9.00 | -9.0 | 2.00 | 0.00 | 1.0 | Post | 41.138600 | -104.819200 |
96357 | WYOMING | Wyoming Boys School | 23 | 0 | 5 | 0 | 146 | 0 | 187 | 0 | ... | 0 | 0 | -9.00 | -9.0 | 0.00 | 0.00 | 0.0 | Post | 41.138600 | -104.819200 |
96358 | WYOMING | Youth Emergency Services Inc. | 2 | 2 | 0 | 0 | 17 | 14 | 21 | 18 | ... | 0 | 0 | -9.00 | 0.0 | 0.00 | 0.00 | 1.0 | -9 | 44.296500 | -105.494900 |
96359 | WYOMING | Saint Stephen's Indian School | 0 | 0 | 0 | 0 | 0 | 0 | 110 | 107 | ... | 0 | 0 | -9.00 | 1.0 | 1.00 | 0.00 | 0.0 | -9 | 42.985268 | -108.420787 |
96360 rows × 50 columns
To get a sense for the geographic distrubtion of the 96,000 schools, we can plot their lat-long coordinates. Schools are colored simply by their latitude.
Note Alaska and Hawaii faintly on the left, with lower population densities.
plt.scatter(x=crdc_data['LON1516'], y=crdc_data['LAT1516'], c=crdc_data['LAT1516'], s=0.001, cmap='ocean')
plt.show()
df = crdc_data
RACES = ['BL', 'WH', 'HI']
SEXES = ['M', 'F']
POP_LOWER_BOUND = 50 # Remove populations (e.g. white male) smaller than this threshold
# 1. Plotting
def plot_measure_accross_all_demographics(df, calculation, measure, bounds=[0,1]):
figure_num = 0
plt.figure(figsize=(20,6))
for sex_index, sex in enumerate(SEXES):
for race_index, race in enumerate(RACES):
figure_num += 1
likelyhood = f'{calculation}_{measure}_{race}_{sex}'
curr_dem_data = df[pd.notnull(data[likelyhood])]
plt.subplot(len(SEXES), len(RACES), figure_num)
plt.scatter(x=curr_dem_data['LON1516'], y=curr_dem_data['LAT1516'], c=curr_dem_data[likelyhood], s=1, alpha=1, cmap='coolwarm')
plt.title(f'{race}_{sex}, avg: {round(curr_dem_data[likelyhood].mean(), 2)}, n: {curr_dem_data[likelyhood].count()}')
plt.colorbar()
plt.clim(*bounds)
plt.axis('off')
plt.subplots_adjust(wspace=0.8, hspace=0.6)
plt.show()
# 2. Calculations
# ITERATIVE FUNCTION which appends likelyhood columns to the df for all demographics
# Flag parameter 'comarison_race' lets you compare how many times the first races is likely to be
# affected as the second race.
def calculate_likelyhood_comparisons(df, measure, comparison_race=None, races=RACES, sexes=SEXES, lower_bound=POP_LOWER_BOUND):
df = remove_schools_with_pop_less_than(lower_bound)
for sex in sexes:
for race in races:
df = calculate_likelyhood_comparison(df, measure, race, sex, comparison_race, sex)
return df
def remove_schools_with_pop_less_than(lower_bound):
filter_col_df = df[DEMOGRAPHIC_COUNT_COLS]
filtered_df = filter_col_df[filter_col_df >= lower_bound].dropna()
return df.merge(filtered_df)
def calculate_likelyhood_comparison(df, measure, race, sex, comparison_race, comparison_sex):
likelyhood = get_percentage_affected(df, measure, race, sex)
column_name = f'PERCENT_AFFECTED_{measure}_{race}_{sex}'
if comparison_race:
likelyhood = likelyhood / get_percentage_affected(df, measure, comparison_race, comparison_sex)
column_name = f'LH_COMPARED_TO_WH_FOR_{measure}_{race}_{sex}'
likelyhood = likelyhood[(likelyhood != np.inf) & (pd.notnull(likelyhood)) & (likelyhood > 0)] # Filter out infinity and NaN
return df.merge(
likelyhood.to_frame(column_name),
how='left',
left_index=True,
right_index=True,
)
def get_percentage_affected(df, measure, race, sex):
affected = f'{measure}_{race}_{sex}' # e.g. 'SCH_DISCWODIS_MULTOOS_BL_M'
pop_total = f'SCH_ENR_{race}_{sex}' # e.g. 'SCH_ENR_TR_M'
return df[affected] / df[pop_total]
data = calculate_likelyhood_comparisons(df, 'SCH_DISCWODIS_MULTOOS') # "more than one out of school suspension"
calculation = 'PERCENT_AFFECTED'
measure = 'SCH_DISCWODIS_MULTOOS'
plot_measure_accross_all_demographics(data, calculation, measure, bounds=[0, 0.15])
For the above "percent of population affected" plots, it's near impossible to compare the severity across a single school. One way to zero on on this is to color schools by how much more likely a certain population is to be affeced compared to the least affected population. This measure might reveal schools where, even if a demogrpahic is severely affected, so were other demographics.
How likelyhood comparisons are calculated:
Percent of X pop affected / Percent of White counterpart population affected
data = calculate_likelyhood_comparisons(df, 'SCH_DISCWODIS_MULTOOS', comparison_race='WH') # "more than one out of school suspension"
calculation = 'LH_COMPARED_TO_WH_FOR'
measure = 'SCH_DISCWODIS_MULTOOS'
plot_measure_accross_all_demographics(data, calculation, measure, bounds=[1, 4])
All analysis look at "students without disabilities expelled on zero tolerance policy".
measure = 'SCH_DISCWODIS_EXPZT'
calculation = 'PERCENT_AFFECTED'
data = calculate_likelyhood_comparisons(df, measure, lower_bound=1)
plot_measure_accross_all_demographics(data, calculation, measure, bounds=[0,0.09])
It's possible to have fractional staff recorded if they are not full-time.
A Data entry, system-level error on the form filled in by schools caused only 22,000 schools (after corrections 25000) to correctly enter the number of Law Enforcement Officers on campus. Where this system error occurred, the value is -9
. Due this fact, there are only 17,500 schools with both Police and Counselor counts. We'll do investigate Juvenile Justice Facilities seperately: first, because we'd expect different counselor and police presence there, but second because none of them actually recorded the number of police.
Notably, none of the 608 Juvenile Justice facilities which have a count enterered for Police officers. This may be because the police staff at JJ facilities do not map to the categories on the survey.
We could try to use Security Guards as a proxy for Police. However, only 64 of the 608 JJ facilities have data for both Counselors and Security Guards, so we've removed all JJ schools from the following analysis.
POLICE = 'SCH_FTESECURITY_LEO'
COUNSELORS = 'SCH_FTESERVICES_PSY'
SECURITY_GUARDS = 'SCH_FTESECURITY_GUA'
SUSPENSIONS = 'TOTAL_SUSPENSIONS'
def plot_ratio_to_students_of(
data=crdc_data,
x_name=POLICE,
y_name=COUNSELORS,
xlabel=None,
ylabel=None,
xlim=None,
ylim=None,
dot_size=0.5,
show_hist=False
):
# Filter out any negative numbers, which signal data errors
schools_with_correctly_documented_staff = (data[[y_name, x_name]] >= 0).all(axis='columns')
staff_df = data.loc[schools_with_correctly_documented_staff,]
# Get ratio to student population
total_pop = staff_df['TOT_ENR_M'] + staff_df['TOT_ENR_F']
staff_to_students_df = pd.DataFrame()
staff_to_students_df[y_name] = staff_df[y_name] / total_pop
staff_to_students_df[x_name] = staff_df[x_name] / total_pop
# Total Suspensions, for dot coloring
staff_to_students_df[SUSPENSIONS] = staff_df[['TOT_DISCWODIS_MULTOOS_M', 'TOT_DISCWODIS_MULTOOS_F']].sum(
axis='columns') / total_pop
# Remove anomalies
staff_to_students_df = staff_to_students_df[staff_to_students_df[SUSPENSIONS] > 0]
plt.scatter(x=staff_to_students_df[x_name], y=staff_to_students_df[y_name], alpha=0.9, s=dot_size,
c=staff_to_students_df[SUSPENSIONS], cmap='coolwarm')
plt.clim(0, 0.05)
plt.ylabel(ylabel)
plt.xlabel(xlabel)
plt.title(f'{xlabel} vs {ylabel}, color=Long Term Suspension Percentage, n={len(staff_to_students_df)}')
plt.colorbar()
axes = plt.gca()
axes.set_xlim(xlim)
axes.set_ylim(ylim)
plt.show()
if show_hist:
# Hist of x axis
plot_hist(staff_to_students_df, x_name, xlabel)
# Hist of y axis
plot_hist(staff_to_students_df, y_name, ylabel)
return staff_to_students_df
def plot_hist(data, name, label, x_range=[0, 0.008], y_range=[None, None]):
plt.hist(data[name], range=x_range, bins=100)
plt.xlabel(label)
plt.ylabel('Number of Schools')
plt.title(f'Distribution of {label}, n={len(data)}, mean={round(data[name].mean(), 3)}')
plt.grid(True)
plt.show()
return
Plotting Campus Police against Campus Counselors per student, and color schools by we see a a hotspot of suspensions at shcools with low-counselor levels and high police levels.
You can also see how schools seem to hire with a predetermined ratio in mind: consistent ratio lines jut outward, most notably one marking the a 1-to-1 ratio.
This supports existing intution in the "School to Prison Pipeline" concept -- schools with high suspension rates confront troubled kids with police with higher likelyhood than trained counselors. However, it remains unclear if high levels of suspension-worthy activity triggered hiring more Police, or if increased Police (and a lack of counseling) escalate suspension counts.
Which leads us to two questions we can investigate: do Police correlate positively alone with suspensions, or does it worsen when mixed with low counseling?
staff_to_students_df = plot_ratio_to_students_of(
x_name=POLICE,
y_name=COUNSELORS,
xlabel='Police/student',
ylabel='Counselor/student',
xlim=[0, 0.01],
ylim=[0, 0.01],
show_hist=True
)