#!/usr/bin/env python # coding: utf-8 # ## 2022 SAT SCORE DISTRIBUTION BY STATE # In[152]: import pandas as pd import geopandas as gpd import folium from folium import plugins from folium.plugins import StripePattern import numpy as np # First, get Folium's shape files, which supplies mappable state locations: # In[153]: state_geo = ("https://raw.githubusercontent.com/python-visualization/folium/main/examples/data/us-states.json") geoJSON_df = gpd.read_file(state_geo) geoJSON_df.head() # Now we'll merge the shapes with some columns of SAT data to get everything into one dataframe: # In[154]: geoJSON_df=geoJSON_df.rename(columns = {"id":"stateabbr"}) state_scores = "https://raw.githubusercontent.com/NickKrausStack/SATdata/main/States.csv" df = pd.read_csv(state_scores) df = df[["stateabbr","TotalMean","Participation"]] final_df = geoJSON_df.merge(df, on = "stateabbr") final_df.head() # And now we are ready to leverage Folium to produce a choropleth graph, which will provide an interactive account of averages by state. # In[155]: m = folium.Map(location=[50, -102], zoom_start=2.5, tiles="openstreet map") folium.Choropleth( geo_data=final_df, data=final_df, columns=["stateabbr", "TotalMean"], key_on="feature.properties.stateabbr", fill_color="RdYlGn", fill_opacity=0.5, line_opacity=0.2, legend_name="SAT Scores", ).add_to(m) style_function = lambda x: {'fillColor': '#ffffff', 'color':'#000000', 'fillOpacity': 0.1, 'weight': 0.1} highlight_function = lambda x: {'fillColor': '#000000', 'color':'#000000', 'fillOpacity': 0.50, 'weight': 0.1} NIL = folium.features.GeoJson( data = final_df, style_function=style_function, control=False, highlight_function=highlight_function, tooltip=folium.features.GeoJsonTooltip( fields=['name','TotalMean'], aliases=['name','TotalMean'], style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") ) ) m.add_child(NIL) m.keep_in_front(NIL) m # Similar code to create the choropleth graph comparing participation by state: # In[156]: m = folium.Map(location=[50, -102], zoom_start=2.5, tiles="openstreet map") folium.Choropleth( geo_data=final_df, data=final_df, columns=["stateabbr", "Participation"], key_on="feature.properties.stateabbr", fill_color="RdYlGn", fill_opacity=0.5, line_opacity=0.2, legend_name="SAT Participation Rate", ).add_to(m) style_function = lambda x: {'fillColor': '#ffffff', 'color':'#000000', 'fillOpacity': 0.1, 'weight': 0.1} highlight_function = lambda x: {'fillColor': '#000000', 'color':'#000000', 'fillOpacity': 0.20, 'weight': 0.1} NIL = folium.features.GeoJson( data = final_df, style_function=style_function, control=False, highlight_function=highlight_function, tooltip=folium.features.GeoJsonTooltip( fields=['name','Participation'], aliases=['name','Participation'], style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") ) ) m.add_child(NIL) m.keep_in_front(NIL) m # Use Pandas functionality to calculate separate averages for the cluster states and the rest of the states: # In[151]: midweststates = ['Montana', 'Wyoming', 'North Dakota', 'South Dakota', 'Nebraska', 'Kansas', 'Utah', 'Minnesota', 'Wisconsin', 'Missouri', 'Kentucky', 'Tennessee', 'Mississippi'] midwestdf = final_df[final_df['name'].isin(midweststates)] nomidwestdf = final_df[~final_df['name'].isin(midwest)] print('avg participation, cluster states: ', midwestdf['Participation'].mean()) print('avg participation, non cluster states: ', nomidwestdf['Participation'].mean()) # Now we produce the meanscore/participation line graph using matplotlib. # In[141]: import matplotlib.pyplot as plt plt.plot(df['Participation'], df['TotalMean']) plt.xlabel('Participation rate') plt.ylabel('MeanScore') plt.rcParams['figure.figsize'] = [3,3] plt.show()