import geopandas as gpd, folium, branca
from folium.plugins import Search
folium.__version__
'0.7.0+258.geb90e06'
Let's get some JSON data from the web - both a point layer and a polygon GeoJson dataset with some population data.
states = gpd.read_file(r"https://rawcdn.githack.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json", driver='GeoJSON')
cities = gpd.read_file(r"https://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_50m_populated_places_simple.geojson", driver='GeoJSON')
And take a look at what our data looks like:
states.describe()
density | |
---|---|
count | 52.000000 |
mean | 402.504404 |
std | 1395.100812 |
min | 1.264000 |
25% | 53.440000 |
50% | 100.335000 |
75% | 234.050000 |
max | 10065.000000 |
Look how far the minimum and maximum values for the density are from the top and bottom quartile breakpoints! We have some outliers in our data that are well outside the meat of most of the distribution. Let's look into this to find the culprits within the sample.
states_sorted = states.sort_values(by='density', ascending=False)
states_sorted.head(5).append(states_sorted.tail(5))[['name','density']]
name | density | |
---|---|---|
8 | District of Columbia | 10065.000 |
30 | New Jersey | 1189.000 |
51 | Puerto Rico | 1082.000 |
39 | Rhode Island | 1006.000 |
21 | Massachusetts | 840.200 |
31 | New Mexico | 17.160 |
34 | North Dakota | 9.916 |
26 | Montana | 6.858 |
50 | Wyoming | 5.851 |
1 | Alaska | 1.264 |
Looks like Washington D.C. and Alaska were the culprits on each end of the range. Washington was more dense than the next most dense state, New Jersey, than the least dense state, Alaska was from Wyoming, however. Washington D.C. has a has a relatively small land area for the amount of people that live there, so it makes sense that it's pretty dense. And Alaska has a lot of land area, but not much of it is habitable for humans.
However, we're looking at all of the states in the US to look at things on a more regional level. That high figure at the top of our range for Washington D.C. will really hinder the ability for us to differentiate between the other states, so let's account for that in the min and max values for our color scale, by getting the quantile values close to the end of the range. Anything higher or lower than those values will just fall into the 'highest' and 'lowest' bins for coloring.
min, max = states['density'].quantile([0.05,0.95]).apply(lambda x: round(x, 2))
mean = round(states['density'].mean(),2)
print(f"Min: {min}", f"Max: {max}", f"Mean: {mean}", sep="\n\n")
Min: 8.54 Max: 1040.2 Mean: 402.5
This looks better. Our min and max values for the colorscale are much closer to the mean value now. Let's run with these values, and make a colorscale. I'm just going to use a sequential light-to-dark color palette from the ColorBrewer.
colormap = branca.colormap.LinearColormap(
colors=['#f2f0f7','#cbc9e2','#9e9ac8','#756bb1','#54278f'],
index=states['density'].quantile([0.2,0.4,0.6,0.8]),
vmin=min,
vmax=max
)
colormap.caption="Population Density in the United States"
colormap
Let's narrow down these cities to United states cities, by using GeoPandas' spatial join functionality between two GeoDataFrame objects, using the Point 'within' Polygon functionality.
us_cities = gpd.sjoin(cities, states, how='inner', op='within')
pop_ranked_cities = us_cities.sort_values(by='pop_max', ascending=False)[['nameascii','pop_max', 'geometry'
]].iloc[:20]
Ok, now we have a new GeoDataFrame with our top 20 populated cities. Let's see the top 5.
pop_ranked_cities.head(5)
nameascii | pop_max | geometry | |
---|---|---|---|
1224 | New York | 19040000 | POINT (-73.98196278740681 40.75192492259464) |
1222 | Los Angeles | 12500000 | POINT (-118.1819263699404 33.99192410876543) |
1186 | Chicago | 8990000 | POINT (-87.75200083270931 41.83193651927843) |
1184 | Miami | 5585000 | POINT (-80.22605193945003 25.78955655502153) |
1076 | Philadelphia | 5492000 | POINT (-75.17194183200792 40.00191902252647) |
Alright, let's build a map!
# Instantiate our folium Map.
m = folium.Map(location=[38,-97], zoom_start=4)
# Create some folium GeoJson objects from our GeoDataFrames.
stategeo = folium.GeoJson(states,
name='US States',
style_function=lambda x: {'fillColor': colormap(x['properties']['density']), 'color': 'black',
'weight':2, 'fillOpacity':0.5},
tooltip=folium.GeoJsonTooltip(fields=['name', 'density'],
aliases=['State', 'Density'],
localize=True)
).add_to(m)
citygeo = folium.GeoJson(pop_ranked_cities,
name='US Cities',
tooltip=folium.GeoJsonTooltip(fields=['nameascii','pop_max'],
aliases=['','Population Max'],
localize=True)
).add_to(m)
# Add some Search boxes to the map that reference the GeoDataFrames with some different parameters passed to the
# arguments.
statesearch = Search(layer=stategeo,
geom_type='Polygon',
placeholder="Search for a US State",
collapsed=False,
search_label='name',
weight=3
).add_to(m)
citysearch = Search(layer=citygeo,
geom_type='Point',
placeholder="Search for a US City",
collapsed=True,
search_label='nameascii'
).add_to(m)
# Add a LayerControl.
folium.LayerControl().add_to(m)
# And the Color Map legend.
colormap.add_to(m)
m