#!/usr/bin/env python # coding: utf-8 # # Visualizing change in reservoir surface water extent using the OPERA DSWx product with a time slider # --- # # **This notebook serves as a visualization tool using DSWx B01_WTR to illustrate change of Lake Mead, NV during 2022. Note: This notebook uses provisional products, which may differ slightly from operational products. Please refer to [DSWx product specification](https://d2pn8kiwq2w21t.cloudfront.net/documents/ProductSpec_DSWX_URS309746.pdf) for more information. ** # In[1]: # notebook dependencies import os import datetime from datetime import datetime import math import numpy as np import xarray as xr import rasterio as rio import rioxarray as rioxr import pyproj from pyproj import Proj import pandas as pd import geopandas as gpd import holoviews as hv import panel.widgets as pnw import hvplot.xarray import geoviews as gv from matplotlib.colors import ListedColormap from bokeh.models import FixedTicker hv.extension('bokeh') import sys sys.path.append("../..") from src.xml_util import get_cc import warnings warnings.filterwarnings('ignore') os.environ['AWS_NO_SIGN_REQUEST'] = 'YES' # --- # ## **Data Information Input** # In the code cell below, the user should specify the: # * Dates of interest
# * Data directory
# * Band of interest
# * Path to shapefile to create mask

# # **Note: The cell below is the only code in the notebook that should be modified. ** # In[2]: # Name of S3 bucket that hosts provisional products bucket = 'opera-pst-rs-pop1' # Access all 2022 provisional products from S3 bucket data_dir = [match for match in sorted(open("aux_files/T11SQA_manifest.txt","r").read().split("\n")) if '2022' == match.split("_")[-5][:4]] for i,path in enumerate(data_dir): if path != '': # discard empty entries if path[91] == 'L': # Landsat 8 filenames data_dir[i] = path+path[32:101] if path[91] == 'S': #Sentinel-2 filenames data_dir[i] = path+path[32:102] # Landsat filenames are 1 character shorter than Sentinel-2 filenames # Extract date information from filename dates = [path[57:65] for path in data_dir] # Change this to the desired band for visualization band = 'B01_WTR' # Path to shapefile used to create mask of pixels close to Lake Mead shapepath = 'aux_files/bufferlakebnds/bufferlakebnds.shp' # *** # ### ** -- Do not modify any of the code below -- ** # In[3]: # Functions def file_dataframe(data_dir:str,datelist:str): ''' Returns dataframe with data_dir to DSWx layers for a given list of dates. Parameters: data_dir (str): Working directory for each date datelist (str): List of dates Returns: dswx_file_df (Pandas DataFrame): List of data_dir layer for each date ''' fileDF = [] for i,date in enumerate(dates): xmlpath = f"{data_dir[i][23:]}%s.xml" tifpaths = f"{data_dir[i]}%s.tif" XML = xmlpath%'.iso' B01_WTR = tifpaths%'_B01_WTR' B02_BWTR = tifpaths%'_B02_BWTR' B03_CONF = tifpaths%'_B03_CONF' B04_DIAG = tifpaths%'_B04_DIAG' B05_WTR_1 = tifpaths%'_B05_WTR-1' B06_WTR_2 = tifpaths%'_B06_WTR-2' B07_LAND = tifpaths%'_B07_LAND' B08_SHAD = tifpaths%'_B08_SHAD' B09_CLOUD = tifpaths%'_B09_CLOUD' B10_DEM = tifpaths%'DEM.tif' fileDF.append([date,XML,B01_WTR,B02_BWTR,B03_CONF,B04_DIAG,B05_WTR_1,B06_WTR_2,B07_LAND,B08_SHAD,B09_CLOUD,B10_DEM]) fileDF = pd.DataFrame(fileDF,columns = ['Date', 'XML', 'B01_WTR', 'B02_BWTR', 'B03_CONF', 'B04_DIAG', 'B05_WTR_1', \ 'B06_WTR_2', 'B07_LAND', 'B08_SHAD', 'B09_CLOUD', 'B10_DEM']).astype('string') return fileDF def stack_layers(files:str,datelist:str): ''' Returns geocube with one band over time stacked into one multi-dimensional array. Parameters: files (str): data_dir to band for each date datelist (list): List of dates Returns: layerStack (xarray Dataset): Geocube with band stacked in time crs (int): Coordinate Reference System corresponding to band ''' layerStack = []; layerS = []; layerStack_ = []; for i,d in enumerate(datelist): time = datetime.strptime(d,'%Y%m%d') if i == 0: layerStack_ = xr.open_rasterio(files[i]) crs = pyproj.CRS.to_epsg(pyproj.CRS.from_proj4(layerStack_.crs)) layerStack = layerStack_.squeeze(drop=True) layerStack = layerStack.to_dataset(name='z') layerStack.coords['time'] = np.array(time) layerStack = layerStack.rename({'x':'longitude', 'y':'latitude'}) layerStack = layerStack.expand_dims(dim='time') else: cc = get_cc(dswx_file_df.XML[i]) if cc < cclimit: layerS = xr.open_rasterio(files[i]) layerS = layerS.squeeze(drop=True) layerS = layerS.to_dataset(name='z') layerS.coords['time'] = np.array(time) layerS = layerS.rename({'x':'longitude', 'y':'latitude'}) layerS = layerS.expand_dims(dim='time') layerStack = xr.concat([layerStack,layerS], dim='time') return layerStack, crs def buffer_mask(shapefile:str): ''' Returns masked data based on buffered shapefile. Parameters: shapefile (str): Path to buffered shapefile Returns: masked (xarray Dataset): Geocube of masked data ''' shp = gpd.read_file(shapefile) mask = data.rio.clip(shp.geometry) mask = mask.where(mask['z'] != 255.) masked = []; mask_ = []; masked_ = []; for i in range(len(mask.z)): if i == 0: masked_ = mask.z[i] masked = masked_.squeeze(drop=True) masked = masked.to_dataset(name='z') masked.coords['time'] = mask.time[i] masked = masked.expand_dims(dim='time') else: mask_ = mask.z[i] if np.count_nonzero(np.isnan(mask_))<4e6: mask_ = mask_.squeeze(drop=True) mask_ = mask_.to_dataset(name='z') mask_.coords['time'] = mask.time[i] mask_ = mask_.expand_dims(dim='time') masked = xr.concat([masked,mask_], dim='time') return masked # ### ** -- Do not modify any of the code above -- ** # --- # ## **1. Prepare the Geocube: Create the file dataframe, multidimensional dataset, and mask** # In[4]: # Create dataframe of paths to bands for each date dswx_file_df = file_dataframe(data_dir,dates) # Inspect the dataframe dswx_file_df # We will create a geocube of B01_WTR over time, only including dates with 10% cloud cover or less. This will take several minutes to run due to large data volume. You can shorten the length of time by adjusting the `data_dir` variable. # In[5]: # Percent cloud cover threshold cclimit = 10 # Stack dates to create a multidimensional "geocube" for chosen band data, crs = stack_layers(dswx_file_df[band], dates) # Inspect the geocube dataset created data # We only include pixels near Lake Mead by creating mask from buffered shapefile based on a [2003 USGS shapefile](https://pubs.usgs.gov/of/2009/1150/gis/basemap/lakebndsmeta.htm). This shapefile is created using the optional [prep_shapefile](link) notebook provided in this repository. Additionally, we only include dates with enough valid pixels over and surrounding Lake Mead. # In[6]: # Create a masked geocube masked_data = buffer_mask(shapepath) # Inspect the masked dataset masked_data # --- # ## **2. Visualize surface water extent of Lake Mead during 2022** # In[7]: # Create a basemap base = gv.tile_sources.EsriImagery.opts(width=1000, height=1000, padding=0.1) levels = [0, 0.9, 1.9, 2.9, 7.9, 8.9, 10] color_key = { "Not Water": "#ffffff", "Open Water": "#0000ff", "Partial Surface Water": "#00ff00", "Reserved": "#000000", "Snow/Ice": "#00ffff", "Clouds/Cloud Shadow": "#7f7f7f" } ticks = [0.5, 1.5, 2.5, 5.5, 8.5, 9.5] ticker = FixedTicker(ticks=ticks) labels = dict(zip(ticks, color_key)) masked_data_z = masked_data.z.where(masked_data.z>0) masked_data_slider = masked_data_z.interactive.sel(time=pnw.DiscreteSlider).hvplot(x='longitude', y='latitude', crs=crs, kind='image', rasterize=True, dynamic=True, aspect='equal', frame_width=600, frame_height=600, clim=(0,10), alpha=0.8).opts(active_tools=['wheel_zoom'], xlabel='Longitude', ylabel='Latitude', color_levels=levels, cmap=tuple(color_key.values()), colorbar_opts={'ticker':ticker,'major_label_overrides':labels}) masked_data_slider * base # In[ ]: