#!/usr/bin/env python # coding: utf-8 # ## Accessing JRC Global Surface Water data with the Planetary Computer STAC API # # This dataset contains data that show different facets of the spatial and temporal distribution of surface water over the last 32 years. In this notebook, we'll demonstrate how to access and work with this data through the Planetary Computer. # ### Environment setup # # In[1]: import pystac_client from matplotlib.colors import ListedColormap import rasterio import rasterio.mask import numpy as np import pystac import planetary_computer # ### Data access # # The datasets hosted by the Planetary Computer are available from [Azure Blob Storage](https://docs.microsoft.com/en-us/azure/storage/blobs/). We'll use [pystac-client](https://pystac-client.readthedocs.io/) to search the Planetary Computer's [STAC API](https://planetarycomputer.microsoft.com/api/stac/v1/docs) for the subset of the data that we care about, and then we'll load the data directly from Azure Blob Storage. We'll specify a `modifier` so that we can access the data stored in the Planetary Computer's private Blob Storage Containers. See [Reading from the STAC API](https://planetarycomputer.microsoft.com/docs/quickstarts/reading-stac/) and [Using tokens for data access](https://planetarycomputer.microsoft.com/docs/concepts/sas/) for more. # In[2]: catalog = pystac_client.Client.open( "https://planetarycomputer.microsoft.com/api/stac/v1", modifier=planetary_computer.sign_inplace, ) # ### Query the dataset # # JRC Global Surface Water data on the Planetary Computer is available globally. We'll pick an area with seasonal water in Bangladesh and use the STAC API to find what data items are available. # In[3]: area_of_interest = { "type": "Polygon", "coordinates": [ [ [91.07803344726562, 24.082828048468436], [91.20162963867188, 24.20563707758362], [91.43783569335938, 24.463400705082282], [91.50238037109375, 24.71315296906617], [90.9393310546875, 24.835349273134295], [90.81161499023438, 24.648265332632818], [90.89813232421875, 24.337086982410497], [90.90774536132812, 24.11792837933617], [91.07803344726562, 24.082828048468436], ] ], } # We can now execute a STAC API query for our selected area: # In[4]: jrc = catalog.search(collections=["jrc-gsw"], intersects=area_of_interest) items = jrc.item_collection() print(f"Returned {len(items)} Items") # A single item is returned, so we can work with single assets. If we chose an area with multiple tiles intersecting, we could save use something like [stacstack](https://github.com/gjoseph92/stackstac) or [gdal.BuildVRT](https://gdal.org/python/osgeo.gdal-module.html#BuildVRT) to work with multiple items and assets as a single layer. # In[5]: item = items[0] # Let's capture the color maps that are included in the COG assets by reading the metadata of the GeoTIFFs using rasterio. This also converts the color maps into a format matplotlib understands: # In[6]: cog_assets = [ asset_key for asset_key, asset in item.assets.items() if asset.media_type == pystac.MediaType.COG ] # In[7]: cmaps = {} for asset_key in cog_assets: asset = item.assets[asset_key] with rasterio.open(item.assets[asset_key].href) as src: colormap_def = src.colormap(1) # get metadata colormap for band 1 colormap = [ np.array(colormap_def[i]) / 256 for i in range(256) ] # transform to matplotlib color format cmaps[asset_key] = ListedColormap(colormap) # We can now render the six assets of the JRC Global Surface Water dataset over our area: # In[8]: import matplotlib.pyplot as plt from matplotlib.colors import Normalize dpi = 200 fig = plt.figure(frameon=False, dpi=dpi) for i, asset_key in enumerate(cog_assets): with rasterio.open(item.assets[asset_key].href) as src: asset_data, _ = rasterio.mask.mask( src, [area_of_interest], crop=True, nodata=255 ) ax1 = fig.add_subplot(int(f"23{i+1}")) ax1.set_title(asset_key, fontdict={"fontsize": 5}) ax1.set_axis_off() plt.imshow(asset_data[0], norm=Normalize(0, 255), cmap=cmaps[asset_key])