from lets_plot import *
from lets_plot.geo_data import *
from lets_plot.settings_utils import geocoding_service
#LetsPlot.set(geocoding_service(url='http://3.86.228.157:3025'))
import pandas as pd
LetsPlot.setup_html()
The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).
income_all = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/US_household_income_2017.csv', encoding='latin-1')
income_all.head(3)
id | State_Code | State_Name | State_ab | County | City | Place | Type | Primary | Zip_Code | Area_Code | ALand | AWater | Lat | Lon | Mean | Median | Stdev | sum_w | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1011000 | 1 | Alabama | AL | Mobile County | Chickasaw | Chickasaw city | City | place | 36611 | 251 | 10894952 | 909156 | 30.771450 | -88.079697 | 38773 | 30506 | 33101 | 1638.260513 |
1 | 1011010 | 1 | Alabama | AL | Barbour County | Louisville | Clio city | City | place | 36048 | 334 | 26070325 | 23254 | 31.708516 | -85.611039 | 37725 | 19528 | 43789 | 258.017685 |
2 | 1011020 | 1 | Alabama | AL | Shelby County | Columbiana | Columbiana city | City | place | 35051 | 205 | 44835274 | 261034 | 33.191452 | -86.615618 | 54606 | 31930 | 57348 | 926.031000 |
income_by_state = income_all.groupby("State_Name", as_index=False)["Mean"].mean()
income_by_state.head(3)
State_Name | Mean | |
---|---|---|
0 | Alabama | 53612.925856 |
1 | Alaska | 77670.209524 |
2 | Arizona | 62578.071313 |
income_by_county = income_all.groupby(["State_Name","County"], as_index=False)["Mean"].mean()
income_by_county.head(3)
State_Name | County | Mean | |
---|---|---|---|
0 | Alabama | Autauga County | 53735.557235 |
1 | Alabama | Barbour County | 37725.000000 |
2 | Alabama | Blount County | 55127.000000 |
us48 = regions_state('us-48').to_data_frame()['found name'].tolist()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) C:\Temp/ipykernel_10588/1550784517.py in <module> ----> 1 us48 = regions_state('us-48').to_data_frame()['found name'].tolist() NameError: name 'regions_state' is not defined
data = income_by_county
data = data[data.State_Name.isin(us48)]
row_count, _ = data.shape
print(row_count)
counties = regions_builder2('county',
names=data["County"].tolist(),
states=data["State_Name"].tolist())\
.drop_not_matched()\
.build()
counties.to_data_frame()
centroids=counties.centroids()
centroids
# map_join is lacking multi-key support, so we use pandas.merge
data_with_geometry = centroids.merge(data, left_on=['request', 'state'], right_on=['County', 'State_Name'])
data_with_geometry
ggplot() + geom_point(aes(color='Mean'), data_with_geometry)
boundaries=counties.boundaries()
boundaries
# map_join is lacking multi-key support, so we use pandas.merge
data_with_boundaries = boundaries.merge(data, left_on=['request', 'state'], right_on=['County', 'State_Name'])
data_with_boundaries
map_theme = theme(axis_line="blank", axis_text="blank", axis_title="blank", axis_ticks="blank") + ggsize(900, 400)
ggplot() + geom_map(aes(fill='Mean'), data_with_boundaries) + scale_fill_gradient(low="#007BCD", high="#FE0968", name="Mean income") + map_theme
Issues
# drop_not_found breaks parents - these columns are missing
regions_builder2('county',
names=['Wayne County', 'Not existing County', 'Anson County'],
states=['New York', 'New York', 'North Carolina'],
countries=['usa', 'usa', 'usa'])\
.drop_not_found()\
.build()
# issue with parents geocoding - unexpected ranking behaviour results in broken responses.
# When mulitply object found by one request ambiguous response is generated without use of ranking by weight.
# Ambiguous response is also borken - it returns success response with first namesake object ¯\_(ツ)_/¯
regions_builder2('county',
names=['Wayne County', 'Essex County'],
states=['New York', 'Virginia'],
countries=['usa', 'usa'])\
.build()
# not informative error message
regions_builder2('county',
names=['Wayne County', 'Essex County'],
states=['New York', 'Virginia'],
countries=['usa'])\
.build()
# regions in parent is not yet supported
state_regions = regions_builder2('state', names=data["State_Name"].tolist(), countries=['uSa'] * row_count).build()
counties_via_regions = regions_builder2('county',
names=data["County"].tolist(),
states=state_regions)\
.drop_not_matched()\
.build()
counties_via_regions.to_data_frame()
regions_builder2('state', names=['florida'], scope='Uruguay').build()
regions_builder2('state', names=['florida', 'florida'], countries=['usa', 'Uruguay']).build()