This notebook uses changes in NDVI, EVI or Fractional Cover to identify land change. The algorithm identifies a "baseline" and "analysis" time period and then compares the spectral parameters in each of those time periods. Significant reductions in vegetation are coincident with land change. In some cases these changes could be deforestation. Users of this algorithm should not accept the accuracy of the results but should conduct ground validation testing to assess accuracy. In most cases, these algorithms can be used to identify clusters of pixels that have experienced change and allow targeted investigation of those areas by local or regional governments.
import warnings
# Supress Warning
warnings.filterwarnings('ignore')
# Load Data Cube Configuration
import datacube
dc = datacube.Datacube(app = 'my_app')#, config = '/home/localuser/.datacube.conf')
# Import Data Cube API
import utils.data_cube_utilities.data_access_api as dc_api
api = dc_api.DataAccessApi()#config = '/home/localuser/.datacube.conf')
dc = api.dc
## LS8 Caqueta
# Latitude: (0.000134747292617865, 1.077843593651382)
# Longitude: (-74.91935994831539, -73.30266193148462)
# '2013-04-13', '2018-03-26'
# Resolution: (-0.000269494585236, 0.000269494585236)
## LS8 Vietnam
# Latitude: (10.513927001104687, 12.611133863411238)
# Longitude: (106.79005909290998, 108.91906631627438)
# '2014-01-14', '2016-12-21'
# Resolution: (-0.000269494585236, 0.000269494585236)
## LS7 Caqueta
# Latitude: (0.000134747292617865, 1.077843593651382)
# Longitude: (-74.91935994831539, -73.30266193148462)
# '1999-08-21', '2018-03-25'
# Resolution: (-0.000269494585236, 0.000269494585236)
## LS7 Lake Baringo
# Latitude: (0.4997747685, 0.7495947795)
# Longitude: (35.9742163305, 36.473586859499996)
# '2005-01-08', '2016-12-24'
# Resolution: (-0.000269493, 0.000269493)
# CHANGE HERE >>>>>>>>>>>>>>>>>
# Select a Product and Platform
#product = ls7_collection1_AMA_ingest
#platform = "LANDSAT_7"
product = 'ls7_collection1_AMA_ingest'
platform = "LANDSAT_7"
output_crs = 'EPSG:4326'
resolution = (-0.000269494585236, 0.000269494585236)
# CHANGE HERE >>>>>>>>>>>>>>>>>
# Select an analysis region (Lat-Lon) within the extents listed above.
# HINT: Keep your region small (<0.5 deg square) to avoid memory overload issues
# Select a time period (Min-Max) within the extents listed above (Year-Month-Day)
# This region and time period will be used for the cloud assessment
#Sub-region selection
latitude = (1.0684, 0.8684)
longitude = (-74.8409, -74.6409)
## The code below renders a map that can be used to orient yourself with the region.
from utils.data_cube_utilities.dc_display_map import display_map
display_map(latitude = latitude, longitude = longitude)
from datetime import datetime
# CHANGE HERE >>>>>>>>>>>>>>>>>>>>
# Select the start and end periods for your analysis products
# The datetime function is (Year,Month,Day)
# These time windows will be used to make a mosaic, so typically pick a year length or more
# Be sure to evaluate the RGB mosaics to affirm they are not full of clouds
# Select the baseline time period (start and end)
baseline_time_period = (datetime(2000,1,1), datetime(2001,1,1))
# Select the analysis time period (start and end)
analysis_time_period = (datetime(2016,1,1), datetime(2017,6,1))
# CHANGE HERE >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# Select the cloud-free mosaic type
# Options are: max_ndvi, min_ndvi, median, most_recent_pixel, geomedian
# If a geomedian is selected, it will take much longer to process
# It is most common to use the max_ndvi or median for these land change analyses
# HINT: Consider max_ndvi mosaics for NDVI analyses and median mosaics for EVI analyses
baseline_mosaic_function = "median"
analysis_mosaic_function = "median"
Load Data ( Baseline, Analysis)¶
import datacube
name = "land change"
version = 1
dc = datacube.Datacube(app = "{}_v{}".format(name, version))#, #config = '/home/localuser/.datacube.conf')
baseline_ds = dc.load(
latitude = latitude,
longitude = longitude,
time = baseline_time_period,
measurements = ["red", "green", "blue", "nir", "swir1", "swir2", "pixel_qa"],
product = product,
platform = platform,
output_crs = output_crs,
resolution = resolution
)
analysis_ds = dc.load(
latitude = latitude,
longitude = longitude,
time = analysis_time_period,
measurements = ["red", "green", "blue", "nir", "swir1", "swir2", "pixel_qa"],
product = product,
platform = platform,
output_crs = output_crs,
resolution = resolution
)
Check if loads are valid¶
import xarray as xr
def is_dataset_empty(ds:xr.Dataset) -> bool:
checks_for_empty = [
lambda x: len(x.dims) == 0, #Dataset has no dimensions
lambda x: len(x.data_vars) == 0 #Dataset no variables
]
for f in checks_for_empty:
if f(ds) == True:
return True
return False
if is_dataset_empty(baseline_ds): raise Exception("DataCube Load returned an empty Dataset." +
"Please check load parameters for Baseline Dataset!")
if is_dataset_empty(analysis_ds): raise Exception("DataCube Load returned an empty Dataset." +
"Please check load parameters for Analysis Dataset!")
Clean Data¶
Generating boolean masks that highlight valid pixels Pixels must be cloud-free over land or water to be considered
from utils.data_cube_utilities.dc_mosaic import ls8_unpack_qa, ls7_unpack_qa
unpack_function = {"LANDSAT_7": ls7_unpack_qa,
"LANDSAT_8": ls8_unpack_qa}
unpack = unpack_function[platform]
import numpy as np
def clean_mask(ds, unpacking_func, bands):
masks = [unpacking_func(ds, band) for band in bands]
return np.logical_or(*masks).values
baseline_clean_mask = clean_mask(baseline_ds.pixel_qa,unpack, ["clear", "water"])
analysis_clean_mask = clean_mask(analysis_ds.pixel_qa, unpack, ["clear", "water"])
baseline_ds = baseline_ds.where(baseline_clean_mask)
analysis_ds = analysis_ds.where(analysis_clean_mask)
Mosaic¶
Use clean masks in a time series composite
from utils.data_cube_utilities.dc_mosaic import create_max_ndvi_mosaic, create_min_ndvi_mosaic, create_median_mosaic, create_mosaic, create_hdmedians_multiple_band_mosaic
mosaic_function = {"median": create_median_mosaic,
"max_ndvi": create_max_ndvi_mosaic,
"min_ndvi": create_min_ndvi_mosaic,
"geomedian": create_hdmedians_multiple_band_mosaic,
"most_recent_pixel": create_mosaic}
baseline_compositor = mosaic_function[baseline_mosaic_function]
analysis_compositor = mosaic_function[analysis_mosaic_function]
baseline_composite = baseline_compositor(baseline_ds, clean_mask = baseline_clean_mask)
analysis_composite = analysis_compositor(analysis_ds, clean_mask = analysis_clean_mask)
from utils.data_cube_utilities.dc_rgb import rgb
rgb(baseline_composite, use_data_min=True, use_data_max=True)
(<Figure size 576x576 with 1 Axes>, <matplotlib.axes._subplots.AxesSubplot at 0x7f9561e8acf8>)
rgb(analysis_composite, use_data_min=True, use_data_max=True)
(<Figure size 576x576 with 1 Axes>, <matplotlib.axes._subplots.AxesSubplot at 0x7f9549ef80f0>)
Plot a spectral index using the cloud-filtered mosaic¶
def NDVI(dataset):
return (dataset.nir - dataset.red)/(dataset.nir + dataset.red)
def EVI(dataset):
return 2.5*(dataset.nir - dataset.red)/(dataset.nir + 6.0*dataset.red - 7.5*dataset.blue + 1.0)
ndvi = NDVI(baseline_composite)
evi = EVI(baseline_composite)
from utils.data_cube_utilities.dc_fractional_coverage_classifier import frac_coverage_classify
frac_classes = frac_coverage_classify(baseline_composite, clean_mask = np.ones(baseline_composite.pixel_qa.shape).astype(np.bool))
# CHANGE HERE >>>>>>>>>>>>>>>>>>
# Select a spectral index to plot by removing one of the comment tags (#) below
# Adjust the scale of the output using vmin and vmax
# (ndvi).plot(figsize=(10,10),cmap = "Greens", vmin=0.3, vmax=1.0)
(evi).plot(figsize=(10,10),cmap = "Greens", vmin=1.75, vmax=2.5)
# (frac_classes.pv).plot(figsize=(10,10),cmap = "Greens", vmin=70.0, vmax=100.0)
<matplotlib.collections.QuadMesh at 0x7f9548676908>
# CHANGE HERE >>>>>>>>>>>>>>>>>>>>
# Select a baseline threshold range for a given parameter ... which is basically a "mask"
# The analysis will only consider pixels in this range for change detection
# No data or cloud pixels will also be masked in this process
# If you want to see all change, you should select the full range of values for each parameter
# NDVI full range = 0.0 to 1.0
# EVI full range = 0.0 to 2.5
# Fractional Cover full range = 0.0 to 100.0
# If you want to mask out dense vegetation, typical of forested land, use the values below
# NDVI: 0.7 to 1.0
# EVI: 2.0 to 2.5
# Fractional Cover PV: 70.0 to 100.0
baseline_threshold_range = (2.00, 2.50)
_min, _max = baseline_threshold_range
# CHANGE HERE >>>>>>>>>>>>>
# Select the parameter to be used for thresholding ... NDVI, EVI or FC
# Remove the comment statement to use the desired parameter
# baseline_filter_mask = np.logical_and(NDVI(baseline_composite) > _min, NDVI(baseline_composite) < _max)
baseline_filter_mask = np.logical_and(EVI(baseline_composite) > _min, EVI(baseline_composite) < _max)
# baseline_filter_mask = np.logical_and(frac_classes.pv > _min, frac_classes.pv < _max)
def aspect_ratio_helper(ds, fixed_width = 12):
y,x = ds.values.shape
width = fixed_width
height = y * (fixed_width / x)
return (width, height)
import matplotlib.pyplot as plt
from matplotlib.cm import RdYlGn, Greens
RdYlGn.set_bad('black',1.)
Greens.set_bad('black',1.)
# This is the baseline threshold plot that shows GREEN pixels in the threshold range
# Pixels with values outside the threshold range are white
plt.figure(figsize = aspect_ratio_helper(baseline_filter_mask))
baseline_filter_mask.plot(cmap = "Greens")
<matplotlib.collections.QuadMesh at 0x7f95485bf240>
baseline_composite = baseline_composite.where(baseline_filter_mask)
from utils.data_cube_utilities.dc_fractional_coverage_classifier import frac_coverage_classify
# CHANGE HERE >>>>>>>>>>>>>
# Select the parameter to be used for the analysis
# You will select two lines for each parameter (baseline and analysis)
# Select these two lines for NDVI
# parameter_baseline_composite = NDVI(baseline_composite)
# parameter_analysis_composite = NDVI(analysis_composite)
# Select these two lines for EVI
parameter_baseline_composite = EVI(baseline_composite)
parameter_analysis_composite = EVI(analysis_composite)
# Select these two lines for Fractional Cover (FC)
# parameter_baseline_composite = frac_coverage_classify(baseline_composite, clean_mask = np.ones(baseline_composite.pixel_qa.shape).astype(np.bool))
# parameter_analysis_composite = frac_coverage_classify(analysis_composite, clean_mask = np.ones(analysis_composite.pixel_qa.shape).astype(np.bool))
# CHANGE HERE >>>>>>>>>>>>>
# Select this line for NDVI or EVI
parameter_anomaly = parameter_analysis_composite - parameter_baseline_composite
# Select this line for Fractional Cover (FC), where PV = Photosynthetic Vegetation
# parameter_anomaly = parameter_analysis_composite.pv - parameter_baseline_composite.pv
# Plot the parameter change from the baseline to the analysis period
# Significant loss in vegetation will be shown in RED
# Gains in vegetation will be shown in GREEN
plt.figure(figsize = aspect_ratio_helper(parameter_anomaly))
parameter_anomaly.plot(cmap = RdYlGn)
<matplotlib.collections.QuadMesh at 0x7f95484e9ba8>
# CHANGE HERE >>>>>>>>>>>>>>>>>>>>
# Select an Anomaly Threshold Range to filter the results from the previous image
# Be sure to put the smallest value in the "minimum_change" location (be careful of negative values)
# Losses are typically in these ranges for deforestation
# NDVI: -0.2 to -0.7
# EVI: -0.5 to -1.75
# Fractional Cover PV: -20 to -70
minimum_change = -1.75
maximum_change = -0.5
# Create a threshold plot using the MIN and MAX range defined above
from matplotlib.ticker import FuncFormatter
def threshold_plot(da, min_threshold, max_threshold, mask = None, width = 10, *args, **kwargs):
color_in = np.array([255,0,0])
color_out = np.array([0,0,0])
color_cloud = np.array([255,255,255])
array = np.zeros((*da.values.shape, 3)).astype(np.int16)
inside = np.logical_and(da.values > min_threshold, da.values < max_threshold)
outside = np.invert(inside)
masked = np.zeros(da.values.shape).astype(bool) if mask is None else mask
array[inside] = color_in
array[outside] = color_out
array[masked] = color_cloud
def figure_ratio(ds, fixed_width = 10):
width = fixed_width
height = len(ds.latitude) * (fixed_width / len(ds.longitude))
return (width, height)
fig, ax = plt.subplots(figsize = figure_ratio(da,fixed_width = width))
lat_formatter = FuncFormatter(lambda y_val, tick_pos: "{0:.3f}".format(da.latitude.values[tick_pos] ))
lon_formatter = FuncFormatter(lambda x_val, tick_pos: "{0:.3f}".format(da.longitude.values[tick_pos]))
ax.xaxis.set_major_formatter(lon_formatter)
ax.yaxis.set_major_formatter(lat_formatter)
plt.title("Threshold: {} < x < {}".format(min_threshold, max_threshold))
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.imshow(array, *args, **kwargs)
plt.show()
no_data_mask = np.logical_or(np.isnan(baseline_composite.red.values), np.isnan(analysis_composite.red.values))
# Plot the parameter change using thresholds and 3 colors
# RED = data in the threshold range (significant land change)
# BLACK = data outside the threshold range (no significant change)
# WHITE = data outside the baseline mask (not dense vegetation) or clouds (no data)
threshold_plot(parameter_anomaly, minimum_change, maximum_change, mask = no_data_mask, width = 10)
def threshold_count(da, min_threshold, max_threshold, mask = None):
def count_not_nans(arr):
return np.count_nonzero(~np.isnan(arr))
in_threshold = np.logical_and( da.values > min_threshold, da.values < max_threshold)
total_non_cloudy = count_not_nans(da.values) if mask is None else np.sum(mask)
return dict(total = np.size(da.values),
total_non_cloudy = total_non_cloudy,
inside = np.nansum(in_threshold),
outside = total_non_cloudy - np.nansum(in_threshold)
)
def threshold_percentage(da, min_threshold, max_threshold, mask = None):
counts = threshold_count(da, min_threshold, max_threshold, mask = mask)
return dict(percent_inside_threshold = (counts["inside"] / counts["total"]) * 100.0,
percent_outside_threshold = (counts["outside"] / counts["total"]) * 100.0,
percent_clouds = ( 100.0-counts["total_non_cloudy"] / counts["total"] * 100.0))
# This output is a count of the pixels that fall within each threshold range
threshold_count(parameter_anomaly,minimum_change,maximum_change)
{'total': 552049, 'total_non_cloudy': 348765, 'inside': 102958, 'outside': 245807}
# This output is a percentage of the pixels that fall within each threshold range
threshold_percentage(parameter_anomaly,minimum_change,maximum_change)
{'percent_inside_threshold': 18.650156054987875, 'percent_outside_threshold': 44.52630110732924, 'percent_clouds': 36.823542837682886}
import rasterio
def write_geotiff_from_xr(tif_path, data, bands=None, no_data=-9999, crs="EPSG:4326",
x_coord='longitude', y_coord='latitude'):
"""Write a geotiff from an xarray dataset.
Parameters
----------
tif_path: string
The path to write the GeoTIFF file to. You should include the file extension.
x_coord, y_coord: string
The string names of the x and y dimensions.
data: xarray.Dataset or xarray.DataArray
bands: list of string
The bands to write - in the order they should be written.
Ignored if `data` is an `xarray.DataArray`.
no_data: int
The nodata value.
crs: string
The CRS of the output.
"""
if isinstance(data, xr.DataArray):
height, width = data.sizes[y_coord], data.sizes[x_coord]
count, dtype = 1, data.dtype
else:
if bands is None:
bands = list(data.data_vars.keys())
else:
assrt_msg_begin = "The `data` parameter is an `xarray.Dataset`. "
assert isinstance(bands, list), assrt_msg_begin + "Bands must be a list of strings."
assert len(bands) > 0 and isinstance(bands[0], str), assrt_msg_begin + "You must supply at least one band."
height, width = data.dims[y_coord], data.dims[x_coord]
count, dtype = len(bands), data[bands[0]].dtype
with rasterio.open(
tif_path,
'w',
driver='GTiff',
height=height,
width=width,
count=count,
dtype=dtype,
crs=crs,
transform=_get_transform_from_xr(data),
nodata=no_data) as dst:
if isinstance(data, xr.DataArray):
dst.write(data.values, 1)
else:
for index, band in enumerate(bands):
dst.write(data[band].values, index + 1)
dst.close()
def _get_transform_from_xr(data, x_coord='longitude', y_coord='latitude'):
"""Create a geotransform from an xarray.Dataset or xarray.DataArray.
"""
from rasterio.transform import from_bounds
geotransform = from_bounds(data[x_coord][0], data[y_coord][-1],
data[x_coord][-1], data[y_coord][0],
len(data[x_coord]), len(data[y_coord]))
return geotransform
# CHANGE HERE >>>>>>>>>>>>>>>>>>>>
# Remove the comment tag # to create a GeoTIFF output product
# Change the name of the output file, or it will be overwritten for each run
# There are 3 possible outputs ... baseline mosaic, analysis mosaic, or anomaly product
# write_geotiff_from_xr("geotiffs/sample_baseline_01.tif", bands = ['red','green','blue','nir','swir1','swir2'], baseline_composite)
# write_geotiff_from_xr("geotiffs/sample_analysis_01.tif", bands = ['red','green','blue','nir','swir1','swir2'], analysis_composite)
# write_geotiff_from_xr("geotiffs/sample_anomaly_01.tif", parameter_anomaly)
!ls -lah geotiffs
total 2.9M drwxrwsr-x 2 1000 users 4.0K Jan 14 04:40 . drwxrwsr-x 6 1000 users 4.0K Jan 14 11:21 .. -rw-rw-r-- 1 1000 users 1.7M Jan 14 00:16 DEMO_median_composite.tif -rw-rw-r-- 1 1000 users 1.2M Jan 14 04:40 WOFS_Percentage_demo.tif