#!/usr/bin/env python # coding: utf-8 # # Land Change # # This notebook uses changes in NDVI, EVI or Fractional Cover to identify land change. The algorithm identifies a "baseline" and "analysis" time period and then compares the spectral parameters in each of those time periods. Significant reductions in vegetation are coincident with land change. In some cases these changes could be deforestation. Users of this algorithm should not accept the accuracy of the results but should conduct ground validation testing to assess accuracy. In most cases, these algorithms can be used to identify clusters of pixels that have experienced change and allow targeted investigation of those areas by local or regional governments. # In[1]: import warnings # Supress Warning warnings.filterwarnings('ignore') # Load Data Cube Configuration import datacube dc = datacube.Datacube(app = 'my_app')#, config = '/home/localuser/.datacube.conf') # Import Data Cube API import utils.data_cube_utilities.data_access_api as dc_api api = dc_api.DataAccessApi()#config = '/home/localuser/.datacube.conf') dc = api.dc # In[2]: ## LS8 Caqueta # Latitude: (0.000134747292617865, 1.077843593651382) # Longitude: (-74.91935994831539, -73.30266193148462) # '2013-04-13', '2018-03-26' # Resolution: (-0.000269494585236, 0.000269494585236) ## LS8 Vietnam # Latitude: (10.513927001104687, 12.611133863411238) # Longitude: (106.79005909290998, 108.91906631627438) # '2014-01-14', '2016-12-21' # Resolution: (-0.000269494585236, 0.000269494585236) ## LS7 Caqueta # Latitude: (0.000134747292617865, 1.077843593651382) # Longitude: (-74.91935994831539, -73.30266193148462) # '1999-08-21', '2018-03-25' # Resolution: (-0.000269494585236, 0.000269494585236) ## LS7 Lake Baringo # Latitude: (0.4997747685, 0.7495947795) # Longitude: (35.9742163305, 36.473586859499996) # '2005-01-08', '2016-12-24' # Resolution: (-0.000269493, 0.000269493) # In[3]: # CHANGE HERE >>>>>>>>>>>>>>>>> # Select a Product and Platform #product = ls7_collection1_AMA_ingest #platform = "LANDSAT_7" product = 'ls7_collection1_AMA_ingest' platform = "LANDSAT_7" output_crs = 'EPSG:4326' resolution = (-0.000269494585236, 0.000269494585236) # In[4]: # CHANGE HERE >>>>>>>>>>>>>>>>> # Select an analysis region (Lat-Lon) within the extents listed above. # HINT: Keep your region small (<0.5 deg square) to avoid memory overload issues # Select a time period (Min-Max) within the extents listed above (Year-Month-Day) # This region and time period will be used for the cloud assessment #Sub-region selection latitude = (1.0684, 0.8684) longitude = (-74.8409, -74.6409) # In[5]: ## The code below renders a map that can be used to orient yourself with the region. from utils.data_cube_utilities.dc_display_map import display_map display_map(latitude = latitude, longitude = longitude) # # Define Analysis Parameters # In[6]: from datetime import datetime # In[7]: # CHANGE HERE >>>>>>>>>>>>>>>>>>>> # Select the start and end periods for your analysis products # The datetime function is (Year,Month,Day) # These time windows will be used to make a mosaic, so typically pick a year length or more # Be sure to evaluate the RGB mosaics to affirm they are not full of clouds # Select the baseline time period (start and end) baseline_time_period = (datetime(2000,1,1), datetime(2001,1,1)) # Select the analysis time period (start and end) analysis_time_period = (datetime(2016,1,1), datetime(2017,6,1)) # In[8]: # CHANGE HERE >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # Select the cloud-free mosaic type # Options are: max_ndvi, min_ndvi, median, most_recent_pixel, geomedian # If a geomedian is selected, it will take much longer to process # It is most common to use the max_ndvi or median for these land change analyses # HINT: Consider max_ndvi mosaics for NDVI analyses and median mosaics for EVI analyses baseline_mosaic_function = "median" analysis_mosaic_function = "median" # # Load and Clean Data # >#### Load Data ( Baseline, Analysis) # In[9]: import datacube name = "land change" version = 1 dc = datacube.Datacube(app = "{}_v{}".format(name, version))#, #config = '/home/localuser/.datacube.conf') # In[10]: baseline_ds = dc.load( latitude = latitude, longitude = longitude, time = baseline_time_period, measurements = ["red", "green", "blue", "nir", "swir1", "swir2", "pixel_qa"], product = product, platform = platform, output_crs = output_crs, resolution = resolution ) # In[11]: analysis_ds = dc.load( latitude = latitude, longitude = longitude, time = analysis_time_period, measurements = ["red", "green", "blue", "nir", "swir1", "swir2", "pixel_qa"], product = product, platform = platform, output_crs = output_crs, resolution = resolution ) # > #### Check if loads are valid # In[12]: import xarray as xr def is_dataset_empty(ds:xr.Dataset) -> bool: checks_for_empty = [ lambda x: len(x.dims) == 0, #Dataset has no dimensions lambda x: len(x.data_vars) == 0 #Dataset no variables ] for f in checks_for_empty: if f(ds) == True: return True return False # In[13]: if is_dataset_empty(baseline_ds): raise Exception("DataCube Load returned an empty Dataset." + "Please check load parameters for Baseline Dataset!") # In[14]: if is_dataset_empty(analysis_ds): raise Exception("DataCube Load returned an empty Dataset." + "Please check load parameters for Analysis Dataset!") # > #### Clean Data # > Generating boolean masks that highlight valid pixels # > Pixels must be cloud-free over land or water to be considered # In[15]: from utils.data_cube_utilities.dc_mosaic import ls8_unpack_qa, ls7_unpack_qa unpack_function = {"LANDSAT_7": ls7_unpack_qa, "LANDSAT_8": ls8_unpack_qa} unpack = unpack_function[platform] # In[16]: import numpy as np def clean_mask(ds, unpacking_func, bands): masks = [unpacking_func(ds, band) for band in bands] return np.logical_or(*masks).values # In[17]: baseline_clean_mask = clean_mask(baseline_ds.pixel_qa,unpack, ["clear", "water"]) analysis_clean_mask = clean_mask(analysis_ds.pixel_qa, unpack, ["clear", "water"]) # In[18]: baseline_ds = baseline_ds.where(baseline_clean_mask) analysis_ds = analysis_ds.where(analysis_clean_mask) # >#### Mosaic # > Use clean masks in a time series composite # In[19]: from utils.data_cube_utilities.dc_mosaic import create_max_ndvi_mosaic, create_min_ndvi_mosaic, create_median_mosaic, create_mosaic, create_hdmedians_multiple_band_mosaic # In[20]: mosaic_function = {"median": create_median_mosaic, "max_ndvi": create_max_ndvi_mosaic, "min_ndvi": create_min_ndvi_mosaic, "geomedian": create_hdmedians_multiple_band_mosaic, "most_recent_pixel": create_mosaic} # In[21]: baseline_compositor = mosaic_function[baseline_mosaic_function] analysis_compositor = mosaic_function[analysis_mosaic_function] # In[22]: baseline_composite = baseline_compositor(baseline_ds, clean_mask = baseline_clean_mask) analysis_composite = analysis_compositor(analysis_ds, clean_mask = analysis_clean_mask) # In[23]: from utils.data_cube_utilities.dc_rgb import rgb # In[24]: rgb(baseline_composite, use_data_min=True, use_data_max=True) # In[25]: rgb(analysis_composite, use_data_min=True, use_data_max=True) # >## Plot a spectral index using the cloud-filtered mosaic # In[26]: def NDVI(dataset): return (dataset.nir - dataset.red)/(dataset.nir + dataset.red) # In[27]: def EVI(dataset): return 2.5*(dataset.nir - dataset.red)/(dataset.nir + 6.0*dataset.red - 7.5*dataset.blue + 1.0) # In[28]: ndvi = NDVI(baseline_composite) evi = EVI(baseline_composite) # In[29]: from utils.data_cube_utilities.dc_fractional_coverage_classifier import frac_coverage_classify frac_classes = frac_coverage_classify(baseline_composite, clean_mask = np.ones(baseline_composite.pixel_qa.shape).astype(np.bool)) # In[30]: # CHANGE HERE >>>>>>>>>>>>>>>>>> # Select a spectral index to plot by removing one of the comment tags (#) below # Adjust the scale of the output using vmin and vmax # (ndvi).plot(figsize=(10,10),cmap = "Greens", vmin=0.3, vmax=1.0) (evi).plot(figsize=(10,10),cmap = "Greens", vmin=1.75, vmax=2.5) # (frac_classes.pv).plot(figsize=(10,10),cmap = "Greens", vmin=70.0, vmax=100.0) # In[31]: # CHANGE HERE >>>>>>>>>>>>>>>>>>>> # Select a baseline threshold range for a given parameter ... which is basically a "mask" # The analysis will only consider pixels in this range for change detection # No data or cloud pixels will also be masked in this process # If you want to see all change, you should select the full range of values for each parameter # NDVI full range = 0.0 to 1.0 # EVI full range = 0.0 to 2.5 # Fractional Cover full range = 0.0 to 100.0 # If you want to mask out dense vegetation, typical of forested land, use the values below # NDVI: 0.7 to 1.0 # EVI: 2.0 to 2.5 # Fractional Cover PV: 70.0 to 100.0 baseline_threshold_range = (2.00, 2.50) # In[32]: _min, _max = baseline_threshold_range # In[33]: # CHANGE HERE >>>>>>>>>>>>> # Select the parameter to be used for thresholding ... NDVI, EVI or FC # Remove the comment statement to use the desired parameter # baseline_filter_mask = np.logical_and(NDVI(baseline_composite) > _min, NDVI(baseline_composite) < _max) baseline_filter_mask = np.logical_and(EVI(baseline_composite) > _min, EVI(baseline_composite) < _max) # baseline_filter_mask = np.logical_and(frac_classes.pv > _min, frac_classes.pv < _max) # In[34]: def aspect_ratio_helper(ds, fixed_width = 12): y,x = ds.values.shape width = fixed_width height = y * (fixed_width / x) return (width, height) # In[35]: import matplotlib.pyplot as plt from matplotlib.cm import RdYlGn, Greens RdYlGn.set_bad('black',1.) Greens.set_bad('black',1.) # In[36]: # This is the baseline threshold plot that shows GREEN pixels in the threshold range # Pixels with values outside the threshold range are white plt.figure(figsize = aspect_ratio_helper(baseline_filter_mask)) baseline_filter_mask.plot(cmap = "Greens") # In[37]: baseline_composite = baseline_composite.where(baseline_filter_mask) # # Spectral Parameter Anomaly # In[38]: from utils.data_cube_utilities.dc_fractional_coverage_classifier import frac_coverage_classify # In[39]: # CHANGE HERE >>>>>>>>>>>>> # Select the parameter to be used for the analysis # You will select two lines for each parameter (baseline and analysis) # Select these two lines for NDVI # parameter_baseline_composite = NDVI(baseline_composite) # parameter_analysis_composite = NDVI(analysis_composite) # Select these two lines for EVI parameter_baseline_composite = EVI(baseline_composite) parameter_analysis_composite = EVI(analysis_composite) # Select these two lines for Fractional Cover (FC) # parameter_baseline_composite = frac_coverage_classify(baseline_composite, clean_mask = np.ones(baseline_composite.pixel_qa.shape).astype(np.bool)) # parameter_analysis_composite = frac_coverage_classify(analysis_composite, clean_mask = np.ones(analysis_composite.pixel_qa.shape).astype(np.bool)) # In[40]: # CHANGE HERE >>>>>>>>>>>>> # Select this line for NDVI or EVI parameter_anomaly = parameter_analysis_composite - parameter_baseline_composite # Select this line for Fractional Cover (FC), where PV = Photosynthetic Vegetation # parameter_anomaly = parameter_analysis_composite.pv - parameter_baseline_composite.pv # In[41]: # Plot the parameter change from the baseline to the analysis period # Significant loss in vegetation will be shown in RED # Gains in vegetation will be shown in GREEN plt.figure(figsize = aspect_ratio_helper(parameter_anomaly)) parameter_anomaly.plot(cmap = RdYlGn) # # Anomaly Threshold Product # In[42]: # CHANGE HERE >>>>>>>>>>>>>>>>>>>> # Select an Anomaly Threshold Range to filter the results from the previous image # Be sure to put the smallest value in the "minimum_change" location (be careful of negative values) # Losses are typically in these ranges for deforestation # NDVI: -0.2 to -0.7 # EVI: -0.5 to -1.75 # Fractional Cover PV: -20 to -70 minimum_change = -1.75 maximum_change = -0.5 # In[43]: # Create a threshold plot using the MIN and MAX range defined above from matplotlib.ticker import FuncFormatter def threshold_plot(da, min_threshold, max_threshold, mask = None, width = 10, *args, **kwargs): color_in = np.array([255,0,0]) color_out = np.array([0,0,0]) color_cloud = np.array([255,255,255]) array = np.zeros((*da.values.shape, 3)).astype(np.int16) inside = np.logical_and(da.values > min_threshold, da.values < max_threshold) outside = np.invert(inside) masked = np.zeros(da.values.shape).astype(bool) if mask is None else mask array[inside] = color_in array[outside] = color_out array[masked] = color_cloud def figure_ratio(ds, fixed_width = 10): width = fixed_width height = len(ds.latitude) * (fixed_width / len(ds.longitude)) return (width, height) fig, ax = plt.subplots(figsize = figure_ratio(da,fixed_width = width)) lat_formatter = FuncFormatter(lambda y_val, tick_pos: "{0:.3f}".format(da.latitude.values[tick_pos] )) lon_formatter = FuncFormatter(lambda x_val, tick_pos: "{0:.3f}".format(da.longitude.values[tick_pos])) ax.xaxis.set_major_formatter(lon_formatter) ax.yaxis.set_major_formatter(lat_formatter) plt.title("Threshold: {} < x < {}".format(min_threshold, max_threshold)) plt.xlabel('Longitude') plt.ylabel('Latitude') plt.imshow(array, *args, **kwargs) plt.show() # In[44]: no_data_mask = np.logical_or(np.isnan(baseline_composite.red.values), np.isnan(analysis_composite.red.values)) # In[45]: # Plot the parameter change using thresholds and 3 colors # RED = data in the threshold range (significant land change) # BLACK = data outside the threshold range (no significant change) # WHITE = data outside the baseline mask (not dense vegetation) or clouds (no data) threshold_plot(parameter_anomaly, minimum_change, maximum_change, mask = no_data_mask, width = 10) # In[46]: def threshold_count(da, min_threshold, max_threshold, mask = None): def count_not_nans(arr): return np.count_nonzero(~np.isnan(arr)) in_threshold = np.logical_and( da.values > min_threshold, da.values < max_threshold) total_non_cloudy = count_not_nans(da.values) if mask is None else np.sum(mask) return dict(total = np.size(da.values), total_non_cloudy = total_non_cloudy, inside = np.nansum(in_threshold), outside = total_non_cloudy - np.nansum(in_threshold) ) def threshold_percentage(da, min_threshold, max_threshold, mask = None): counts = threshold_count(da, min_threshold, max_threshold, mask = mask) return dict(percent_inside_threshold = (counts["inside"] / counts["total"]) * 100.0, percent_outside_threshold = (counts["outside"] / counts["total"]) * 100.0, percent_clouds = ( 100.0-counts["total_non_cloudy"] / counts["total"] * 100.0)) # In[47]: # This output is a count of the pixels that fall within each threshold range threshold_count(parameter_anomaly,minimum_change,maximum_change) # In[48]: # This output is a percentage of the pixels that fall within each threshold range threshold_percentage(parameter_anomaly,minimum_change,maximum_change) # ## GeoTIFF Output Products # In[49]: import rasterio def write_geotiff_from_xr(tif_path, data, bands=None, no_data=-9999, crs="EPSG:4326", x_coord='longitude', y_coord='latitude'): """Write a geotiff from an xarray dataset. Parameters ---------- tif_path: string The path to write the GeoTIFF file to. You should include the file extension. x_coord, y_coord: string The string names of the x and y dimensions. data: xarray.Dataset or xarray.DataArray bands: list of string The bands to write - in the order they should be written. Ignored if `data` is an `xarray.DataArray`. no_data: int The nodata value. crs: string The CRS of the output. """ if isinstance(data, xr.DataArray): height, width = data.sizes[y_coord], data.sizes[x_coord] count, dtype = 1, data.dtype else: if bands is None: bands = list(data.data_vars.keys()) else: assrt_msg_begin = "The `data` parameter is an `xarray.Dataset`. " assert isinstance(bands, list), assrt_msg_begin + "Bands must be a list of strings." assert len(bands) > 0 and isinstance(bands[0], str), assrt_msg_begin + "You must supply at least one band." height, width = data.dims[y_coord], data.dims[x_coord] count, dtype = len(bands), data[bands[0]].dtype with rasterio.open( tif_path, 'w', driver='GTiff', height=height, width=width, count=count, dtype=dtype, crs=crs, transform=_get_transform_from_xr(data), nodata=no_data) as dst: if isinstance(data, xr.DataArray): dst.write(data.values, 1) else: for index, band in enumerate(bands): dst.write(data[band].values, index + 1) dst.close() def _get_transform_from_xr(data, x_coord='longitude', y_coord='latitude'): """Create a geotransform from an xarray.Dataset or xarray.DataArray. """ from rasterio.transform import from_bounds geotransform = from_bounds(data[x_coord][0], data[y_coord][-1], data[x_coord][-1], data[y_coord][0], len(data[x_coord]), len(data[y_coord])) return geotransform # In[50]: # CHANGE HERE >>>>>>>>>>>>>>>>>>>> # Remove the comment tag # to create a GeoTIFF output product # Change the name of the output file, or it will be overwritten for each run # There are 3 possible outputs ... baseline mosaic, analysis mosaic, or anomaly product # write_geotiff_from_xr("geotiffs/sample_baseline_01.tif", bands = ['red','green','blue','nir','swir1','swir2'], baseline_composite) # write_geotiff_from_xr("geotiffs/sample_analysis_01.tif", bands = ['red','green','blue','nir','swir1','swir2'], analysis_composite) # write_geotiff_from_xr("geotiffs/sample_anomaly_01.tif", parameter_anomaly) # In[51]: get_ipython().system('ls -lah geotiffs') # In[ ]: