#!/usr/bin/env python
# coding: utf-8

# ## 2022 SAT SCORE DISTRIBUTION BY STATE

# In[152]:


import pandas as pd
import geopandas as gpd
import folium 
from folium import plugins
from folium.plugins import StripePattern
import numpy as np


# First, get Folium's shape files, which supplies mappable state locations:

# In[153]:


state_geo = ("https://raw.githubusercontent.com/python-visualization/folium/main/examples/data/us-states.json")
geoJSON_df = gpd.read_file(state_geo)
geoJSON_df.head()


# Now we'll merge the shapes with some columns of SAT data to get everything into one dataframe:

# In[154]:


geoJSON_df=geoJSON_df.rename(columns = {"id":"stateabbr"})
state_scores = "https://raw.githubusercontent.com/NickKrausStack/SATdata/main/States.csv"
df = pd.read_csv(state_scores)
df = df[["stateabbr","TotalMean","Participation"]]
final_df = geoJSON_df.merge(df, on = "stateabbr")
final_df.head()


# And now we are ready to leverage Folium to produce a choropleth graph, which will provide an interactive account of averages by state.

# In[155]:


m = folium.Map(location=[50, -102], zoom_start=2.5, tiles="openstreet map")

folium.Choropleth(
    geo_data=final_df,
    data=final_df,
    columns=["stateabbr", "TotalMean"],
    key_on="feature.properties.stateabbr",
    fill_color="RdYlGn",
    fill_opacity=0.5,
    line_opacity=0.2,
    legend_name="SAT Scores",
).add_to(m)

style_function = lambda x: {'fillColor': '#ffffff', 
                            'color':'#000000', 
                            'fillOpacity': 0.1, 
                            'weight': 0.1}
highlight_function = lambda x: {'fillColor': '#000000', 
                                'color':'#000000', 
                                'fillOpacity': 0.50, 
                                'weight': 0.1}
NIL = folium.features.GeoJson(
    data = final_df,
    style_function=style_function, 
    control=False,
    highlight_function=highlight_function, 
    tooltip=folium.features.GeoJsonTooltip(
        fields=['name','TotalMean'],
        aliases=['name','TotalMean'],
        style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
    )
)
m.add_child(NIL)
m.keep_in_front(NIL)

m


# Similar code to create the choropleth graph comparing participation by state:

# In[156]:


m = folium.Map(location=[50, -102], zoom_start=2.5, tiles="openstreet map")

folium.Choropleth(
    geo_data=final_df,
    data=final_df,
    columns=["stateabbr", "Participation"],
    key_on="feature.properties.stateabbr",
    fill_color="RdYlGn",
    fill_opacity=0.5,
    line_opacity=0.2,
    legend_name="SAT Participation Rate",
).add_to(m)

style_function = lambda x: {'fillColor': '#ffffff', 
                            'color':'#000000', 
                            'fillOpacity': 0.1, 
                            'weight': 0.1}
highlight_function = lambda x: {'fillColor': '#000000', 
                                'color':'#000000', 
                                'fillOpacity': 0.20, 
                                'weight': 0.1}
NIL = folium.features.GeoJson(
    data = final_df,
    style_function=style_function, 
    control=False,
    highlight_function=highlight_function, 
    tooltip=folium.features.GeoJsonTooltip(
        fields=['name','Participation'],
        aliases=['name','Participation'],
        style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
    )
)
m.add_child(NIL)
m.keep_in_front(NIL)

m


# Use Pandas functionality to calculate separate averages for the cluster states and the rest of the states:

# In[151]:


midweststates = ['Montana', 'Wyoming', 'North Dakota', 'South Dakota', 'Nebraska', 'Kansas', 'Utah', 'Minnesota', 'Wisconsin', 'Missouri', 'Kentucky', 'Tennessee', 'Mississippi']
midwestdf = final_df[final_df['name'].isin(midweststates)]
nomidwestdf = final_df[~final_df['name'].isin(midwest)]
print('avg participation, cluster states: ', midwestdf['Participation'].mean())
print('avg participation, non cluster states: ', nomidwestdf['Participation'].mean())


# Now we produce the meanscore/participation line graph using matplotlib.

# In[141]:


import matplotlib.pyplot as plt
plt.plot(df['Participation'], df['TotalMean'])
plt.xlabel('Participation rate')
plt.ylabel('MeanScore')
plt.rcParams['figure.figsize'] = [3,3]
plt.show()