import pandas as pd import numpy as np pd.set_option('display.max_rows', 500) def read_acs_file(path, coldict): return pd.read_csv(path, skiprows=1, na_values=["N", "(X)"], low_memory=False)\ .rename(columns=coldict)\ .set_index([ "Geography", "Id" ]) dp03_cols = { "Estimate; EMPLOYMENT STATUS - In labor force - Civilian labor force": "n_labor_force", "Estimate; EMPLOYMENT STATUS - In labor force - Civilian labor force - Employed": "n_employed" } dp04_cols = { "Estimate; HOUSING OCCUPANCY - Total housing units": "n_housing_units", "Estimate; HOUSING TENURE - Occupied housing units": "n_occupied_units", "Estimate; HOUSING TENURE - Owner-occupied": "n_owner_occupied_units", "Estimate; HOUSING TENURE - Renter-occupied": "n_renter_occupied_units", } dp05_cols = { "Estimate; SEX AND AGE - Total population": "total_pop", "Estimate; RACE - One race - White": "white_pop", "Estimate; RACE - One race - Black or African American": "black_pop" } s1701_cols = { "Total; Estimate; Population for whom poverty status is determined": "n_poverty_universe", "Below poverty level; Estimate; Population for whom poverty status is determined": "n_below_poverty_level" } dp03 = read_acs_file("../data/all-tracts/ACS_12_5YR_DP03_with_ann.csv", dp03_cols) dp04 = read_acs_file("../data/all-tracts/ACS_12_5YR_DP04_with_ann.csv", dp04_cols) dp05 = read_acs_file("../data/all-tracts/ACS_12_5YR_DP05_with_ann.csv", dp05_cols) s1701 = read_acs_file("../data/all-tracts/ACS_12_5YR_S1701_with_ann.csv", s1701_cols) joined = dp05[dp05_cols.values()]\ .join(dp04[dp04_cols.values()])\ .join(dp03[dp03_cols.values()])\ .join(s1701[s1701_cols.values()])\ joined["county"] = [ ", ".join(x.split(", ")[1:]) for x in joined.reset_index()["Geography"] ] stl_county_ids = joined[joined["county"] == "St. Louis County, Missouri"].index.values stl = joined.ix[stl_county_ids] def get_bwdi(tracts): b_total = tracts["black_pop"].sum() w_total = tracts["white_pop"].sum() x = (tracts["black_pop"] / b_total).replace([np.inf, -np.inf], np.nan).fillna(0) y = (tracts["white_pop"] / w_total).replace([np.inf, -np.inf], np.nan).fillna(0) abs_dist = (x - y).apply(abs) dist = abs_dist.sum() / 2 return dist def get_grouped_bwdi(tracts, grouper): grouped = tracts.groupby(grouper) pop = grouped["total_pop"].sum() black_pop = grouped["black_pop"].sum() df = pd.DataFrame({ "bwdi": grouped.apply(get_bwdi), "pop": pop, "black_pop": black_pop, "p_black_pop": black_pop * 1.0 / pop }) return df dissim = get_grouped_bwdi(joined, joined["county"]) large_black_populations = dissim[(dissim["pop"] >= 20000) & (dissim["p_black_pop"] >= 0.2)] large_black_populations["rank"] = large_black_populations["bwdi"].rank(ascending=False) large_black_populations.sort("rank")[["rank", "bwdi", "pop", "p_black_pop"]] def get_p_tracts_within_x_of_average(df, band=0.1): n = len(df) avg = df["black_pop"].sum() * 1. / df["total_pop"].sum() within_range = df[(avg - (df["black_pop"] * 1. / df["total_pop"])).apply(abs) <= band] return len(within_range) * 1.0 / n round(stl["black_pop"].sum() * 1. / stl["total_pop"].sum(), 3) round(get_p_tracts_within_x_of_average(stl, 0.1), 3) def get_p_black_pop_in_segregated(tracts, threshold=0.8): meets_threshold = tracts[(tracts["black_pop"] * 1.0 / tracts["total_pop"]) >= threshold] return meets_threshold["black_pop"].sum() * 1.0 / tracts["black_pop"].sum() round(get_p_black_pop_in_segregated(stl), 3) round(get_p_black_pop_in_segregated(joined), 3) def get_demography(df, grouper): grouped = df.groupby(grouper) return pd.DataFrame({ "n_occupied_units": grouped["n_occupied_units"].sum(), "p_owner_occupied": grouped["n_owner_occupied_units"].sum() * 1.0 / \ grouped["n_occupied_units"].sum(), "p_renter_occupied": df["n_renter_occupied_units"].sum() * 1.0 / \ grouped["n_occupied_units"].sum(), "p_unemployed": 1 - (grouped["n_employed"].sum() * 1.0 / \ grouped["n_labor_force"].sum()), "p_poverty": grouped["n_below_poverty_level"].sum() * 1.0 / \ grouped["n_poverty_universe"].sum() }) demography = get_demography(stl, (stl["black_pop"] * 1.0 / stl["total_pop"]) >= 0.8) demography.applymap(lambda x: round(x, 3)) import matplotlib as mpl import mplstyle import mplstyle.styles.simple %matplotlib inline mplstyle.set(mplstyle.styles.simple) mplstyle.set({ "figure.figsize": (8, 6) }) x_tick_labels = [ "Highly Segregated Tracts", "Other Tracts" ] def style_axis(ax, labels): ax.xaxis.grid(False) ax.set_xticks(range(len(labels))) ax.set_xticklabels(labels, rotation=0, fontsize="xx-large")#, fontweight="bold") ax.set_yticklabels([ "{0:.0f}%".format(y * 100) for y in ax.get_yticks() ]) mpl.pyplot.tight_layout() def make_chart(variable, title): mpl.pyplot.bar(left=[0,1], height=(variable[True], variable[False]), align="center", width=0.6, color="red", alpha=0.8) ax = mpl.pyplot.axes() style_axis(ax, x_tick_labels) ax.set_title(title, fontsize="xx-large") return ax ax = make_chart(demography["p_poverty"], "Population Below Poverty Level (St. Louis County, 2012)\n") ax = make_chart(demography["p_unemployed"], "Percent Unemployment (St. Louis County, 2012)\n") ax = make_chart(demography["p_owner_occupied"], "Households Owning Home (St. Louis County, 2012))\n") mpl.pyplot.bar(left=[0,1], height=(get_p_black_pop_in_segregated(joined), get_p_black_pop_in_segregated(stl)), align="center", width=0.6, color="red", alpha=0.8) ax = mpl.pyplot.axes() style_axis(ax, [ "Nationwide", "St. Louis County" ]) ax.set_title("Percentage of Black Residents Who Live In \nCensus Tracts That Are At Least 80% Black\n", fontsize="xx-large") pass