#!/usr/bin/env python
# coding: utf-8
# # Visualizing Spatial Information - California Housing
#
# This demo shows a simple workflow when working with geospatial data:
#
# * Obtaining a dataset which includes geospatial references.
# * Obtaining a desired geometries (boundaries etc.)
# * Visualisation
#
# In this example we will make a simple **proportional symbols map** using the `California Housing` dataset in `sklearn` package.
# In[1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from lets_plot import *
# In[2]:
LetsPlot.setup_html()
# ## Prepare the dataset
# In[3]:
from sklearn.datasets import fetch_california_housing
california_housing_bunch = fetch_california_housing()
data = pd.DataFrame(california_housing_bunch.data, columns=california_housing_bunch.feature_names)
# Add $-value field to the dataframe.
# dataset.target: numpy array of shape (20640,)
# Each value corresponds to the average house value in units of 100,000.
data['Value($)'] = california_housing_bunch.target * 100000
data.head()
# In[4]:
# Draw a random sample from the data set.
data = data.sample(n=1000)
# ## Static map
#
# Let's create a static map using regular `ggplot2` geometries.
#
# Various shape files related to the state of California are available at https://data.ca.gov web site.
#
# For the purpose of this demo the Calofornia State Boundaty zip was downloaded from
# https://data.ca.gov/dataset/ca-geographic-boundaries and unpacked to `ca-state-boundary` subdirectory.
# ### Use `geopandas` to read a shape file to GeoDataFrame
# In[5]:
#CA = gpd.read_file("./ca-state-boundary/CA_State_TIGER2016.shp")
from lets_plot.geo_data import *
CA = geocode_states('CA').scope('US').inc_res(2).get_boundaries()
CA.head()
# Keeping in mind that our target is the housing value, fill the choropleth over the state contours using `geom_map()`function
# ### Make a plot out of polygon and points
#
# The color of the points will reflect the house age and
# the size of the points will reflect the value of the house.
# In[6]:
# The plot base
p = ggplot() + scale_color_gradient(name='House Age', low='red', high='green')
# The points layer
points = geom_point(aes(x='Longitude',
y='Latitude',
size='Value($)',
color='HouseAge'),
data=data,
alpha=0.8)
# The map
p + geom_polygon(data=CA, fill='#F8F4F0', color='#B71234')\
+ points\
+ theme_void()\
+ ggsize(600, 500)
# ## Interactive map
#
# The `geom_livemap()` function creates an interactive base-map super-layer to which other geometry layers are added.
# ### Configuring map tiles
#
# By default *Lets-PLot* offers high quality vector map tiles but also can fetch raster tiles from a 3d-party Z-X-Y [tile servers](https://wiki.openstreetmap.org/wiki/Tile_servers).
#
# For the sake of the demo lets use *CARTO Antique* tiles by [CARTO](https://carto.com/attribution/) as our basemap.
# In[7]:
LetsPlot.set(
maptiles_zxy(
url='https://cartocdn_c.global.ssl.fastly.net/base-antique/{z}/{x}/{y}@2x.png',
attribution='© OpenStreetMap contributors © CARTO, © CARTO'
)
)
# ### Make a plot similar to the one above but interactive
# In[8]:
p + geom_livemap()\
+ geom_polygon(data=CA, fill='white', color='#B71234', alpha=0.5)\
+ points
# ### Adjust the initial viewport
#
# Use parameters `location` and `zoom` to define the initial viewport.
# In[9]:
# Pass `[lon,lat]` value to the `location` (near Los Angeles)
p + geom_livemap(location=[-118.15, 33.96], zoom=7)\
+ geom_polygon(data=CA, fill='white', color='#B71234', alpha=0.5, size=1)\
+ points