import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 500)

def read_acs_file(path, coldict):
    return pd.read_csv(path, skiprows=1, na_values=["N", "(X)"], low_memory=False)\
        .rename(columns=coldict)\
        .set_index([ "Geography", "Id" ])

dp03_cols = {
    "Estimate; EMPLOYMENT STATUS - In labor force - Civilian labor force": "n_labor_force",
    "Estimate; EMPLOYMENT STATUS - In labor force - Civilian labor force - Employed": "n_employed"
}

dp04_cols = {
    "Estimate; HOUSING OCCUPANCY - Total housing units": "n_housing_units",
    "Estimate; HOUSING TENURE - Occupied housing units": "n_occupied_units",
    "Estimate; HOUSING TENURE - Owner-occupied": "n_owner_occupied_units",
    "Estimate; HOUSING TENURE - Renter-occupied": "n_renter_occupied_units",
}

dp05_cols = {
    "Estimate; SEX AND AGE - Total population": "total_pop",
    "Estimate; RACE - One race - White": "white_pop",
    "Estimate; RACE - One race - Black or African American": "black_pop"
}

s1701_cols = {
    "Total; Estimate; Population for whom poverty status is determined": "n_poverty_universe",
    "Below poverty level; Estimate; Population for whom poverty status is determined": "n_below_poverty_level"
}

dp03 = read_acs_file("../data/all-tracts/ACS_12_5YR_DP03_with_ann.csv", dp03_cols)

dp04 = read_acs_file("../data/all-tracts/ACS_12_5YR_DP04_with_ann.csv", dp04_cols)

dp05 = read_acs_file("../data/all-tracts/ACS_12_5YR_DP05_with_ann.csv", dp05_cols)

s1701 = read_acs_file("../data/all-tracts/ACS_12_5YR_S1701_with_ann.csv", s1701_cols)

joined = dp05[dp05_cols.values()]\
    .join(dp04[dp04_cols.values()])\
    .join(dp03[dp03_cols.values()])\
    .join(s1701[s1701_cols.values()])\

joined["county"] = [ ", ".join(x.split(", ")[1:]) for x in joined.reset_index()["Geography"] ]

stl_county_ids = joined[joined["county"] == "St. Louis County, Missouri"].index.values

stl = joined.ix[stl_county_ids]

def get_bwdi(tracts):
    b_total = tracts["black_pop"].sum()
    w_total = tracts["white_pop"].sum()
    x = (tracts["black_pop"] / b_total).replace([np.inf, -np.inf], np.nan).fillna(0)
    y = (tracts["white_pop"] / w_total).replace([np.inf, -np.inf], np.nan).fillna(0)
    abs_dist = (x - y).apply(abs)
    dist = abs_dist.sum() / 2
    return dist

def get_grouped_bwdi(tracts, grouper):
    grouped = tracts.groupby(grouper)
    pop = grouped["total_pop"].sum()
    black_pop = grouped["black_pop"].sum()
    df = pd.DataFrame({
        "bwdi": grouped.apply(get_bwdi),
        "pop": pop,
        "black_pop": black_pop,
        "p_black_pop": black_pop * 1.0 / pop
    })
    return df

dissim = get_grouped_bwdi(joined, joined["county"])

large_black_populations = dissim[(dissim["pop"] >= 20000) & (dissim["p_black_pop"] >= 0.2)]

large_black_populations["rank"] = large_black_populations["bwdi"].rank(ascending=False)
large_black_populations.sort("rank")[["rank", "bwdi", "pop", "p_black_pop"]]

def get_p_tracts_within_x_of_average(df, band=0.1):
    n = len(df)
    avg = df["black_pop"].sum() * 1. / df["total_pop"].sum()
    within_range = df[(avg - (df["black_pop"] * 1. / df["total_pop"])).apply(abs) <= band]
    return len(within_range) * 1.0 / n

round(stl["black_pop"].sum() * 1. / stl["total_pop"].sum(), 3)

round(get_p_tracts_within_x_of_average(stl, 0.1), 3)

def get_p_black_pop_in_segregated(tracts, threshold=0.8):
    meets_threshold = tracts[(tracts["black_pop"] * 1.0 / tracts["total_pop"]) >= threshold]
    return meets_threshold["black_pop"].sum() * 1.0 / tracts["black_pop"].sum()

round(get_p_black_pop_in_segregated(stl), 3)

round(get_p_black_pop_in_segregated(joined), 3)

def get_demography(df, grouper):
    grouped = df.groupby(grouper)
    return pd.DataFrame({
        "n_occupied_units": grouped["n_occupied_units"].sum(),
        "p_owner_occupied": grouped["n_owner_occupied_units"].sum() * 1.0 / \
            grouped["n_occupied_units"].sum(),
        "p_renter_occupied": df["n_renter_occupied_units"].sum() * 1.0 / \
            grouped["n_occupied_units"].sum(),
        "p_unemployed": 1 - (grouped["n_employed"].sum() * 1.0 / \
            grouped["n_labor_force"].sum()),
        "p_poverty": grouped["n_below_poverty_level"].sum() * 1.0 / \
            grouped["n_poverty_universe"].sum()
    })

demography = get_demography(stl, (stl["black_pop"] * 1.0 / stl["total_pop"]) >= 0.8)
demography.applymap(lambda x: round(x, 3))

import matplotlib as mpl
import mplstyle
import mplstyle.styles.simple
%matplotlib inline

mplstyle.set(mplstyle.styles.simple)

mplstyle.set({
    "figure.figsize": (8, 6)
})

x_tick_labels = [ "Highly Segregated Tracts", "Other Tracts" ]

def style_axis(ax, labels):
    ax.xaxis.grid(False)
    ax.set_xticks(range(len(labels)))
    ax.set_xticklabels(labels, rotation=0, fontsize="xx-large")#, fontweight="bold")
    ax.set_yticklabels([ "{0:.0f}%".format(y * 100) for y in  ax.get_yticks() ])
    mpl.pyplot.tight_layout()

def make_chart(variable, title):
    mpl.pyplot.bar(left=[0,1],
        height=(variable[True], variable[False]),
        align="center",
        width=0.6,
        color="red",
        alpha=0.8)
    ax = mpl.pyplot.axes()
    style_axis(ax, x_tick_labels)
    ax.set_title(title, fontsize="xx-large")
    return ax

ax = make_chart(demography["p_poverty"], "Population Below Poverty Level (St. Louis County, 2012)\n")

ax = make_chart(demography["p_unemployed"], "Percent Unemployment (St. Louis County, 2012)\n")

ax = make_chart(demography["p_owner_occupied"], "Households Owning Home (St. Louis County, 2012))\n")

mpl.pyplot.bar(left=[0,1],
        height=(get_p_black_pop_in_segregated(joined), get_p_black_pop_in_segregated(stl)),
        align="center",
        width=0.6,
        color="red",
        alpha=0.8)
ax = mpl.pyplot.axes()
style_axis(ax, [ "Nationwide", "St. Louis County" ])
ax.set_title("Percentage of Black Residents Who Live In \nCensus Tracts That Are At Least 80% Black\n", fontsize="xx-large")
pass