#!/usr/bin/env python # coding: utf-8 # # Harmonized Landsat Sentinel # # Important docs! and code examples # # https://lpdaac.usgs.gov/documents/1326/HLS_User_Guide_V2.pdf # # https://www.sciencedirect.com/science/article/pii/S0034425718304139 # # https://lpdaac.usgs.gov/data/get-started-data/collection-overview/missions/harmonized-landsat-sentinel-2-hls-overview/ # # # Needs updating, but helpful: # https://lpdaac.usgs.gov/resources/e-learning/getting-started-cloud-native-hls-data-python/ # # And # https://nbviewer.org/github/microsoft/AIforEarthDataSets/blob/main/data/hls.ipynb # In[1]: import geopandas as gpd import pandas as pd # mamba install odc-stac odc-geo pyarrow=10 -c conda-forge import odc from odc.geo.geobox import GeoBox import odc.stac import pystac_client #import planetary_computer import holoviews as hv import hvplot.pandas import hvplot.xarray import xarray as xr import fsspec import ast import numpy as np import pystac import os # In[2]: stac_client = pystac_client.Client.open('https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD') # In[3]: get_ipython().run_cell_magic('time', '', '\nsearch = stac_client.search(\n collections = [\'HLSS30.v2.0\', \'HLSL30.v2.0\'],\n intersects=dict(type="Point", coordinates=[-105.78, 35.79]), # Pecos Mtns NM\n datetime = \'2022/2024\',\n)\n\nitems = search.get_all_items()\nprint(f\'Returned {len(items)} items\')\n') # In[4]: gf = gpd.GeoDataFrame.from_features( items.to_dict(), crs='EPSG:4326') # In[5]: # Instead of re-doing searches, can save and reload # items.save_object_object('results.json') # items = pystac.ItemCollection.from_file('results.json') # gf = gpd.read_file('results.json') # In[6]: items[0].self_href # In[7]: items[0].assets['B03'].href # In[8]: gf['stac_id'] = [item.id for item in items] gf['time'] = pd.to_datetime(gf.datetime) # Use standard xarray dimension name 'time' for index gf.head() # In[9]: print(gf[gf.stac_id.str.startswith('HLS.L30')].shape) # In[10]: print(gf[gf.stac_id.str.startswith('HLS.S30')].shape) # In[11]: gf['platform'] = gf.stac_id.str[4] #'L' = Landsat, 'S' = Sentinel gf['mgrs'] = gf.stac_id.str[9:14] # In[12]: gf.platform.value_counts() # In[13]: df = gf.set_index('time').tz_localize(None) # Drop timezone info (always UTC) df = df.loc[:,['platform','eo:cloud_cover']] # Drop Geometry (#GeoPandasInterface leads to some errors (such as fill_color: expected an element of either String, Dict(Enum('expr', 'field', 'value', 'transform'), Either(String, Instance(Transform), Instance(Expression), Nullable(Color))) or Nullable(Color), got dim('cloud') df.hvplot.scatter(y='platform', c='eo:cloud_cover', marker='s', width=1000) # In[14]: iL = items[0] # landsat iL # In[15]: # Do some index calculations iS = items[-1] #sentinel iS # In[16]: iL.assets # In[17]: iS.assets # In[18]: iL.assets.keys() # In[19]: iS.assets.keys() # In[20]: iS.assets['B8A'].href # In[21]: MGRS_TILE = gf.mgrs.iloc[0] url = 'https://github.com/scottyhq/mgrs/raw/main/MGRS_LAND.parquet' with fsspec.open(url) as file: gfMGRS = gpd.read_parquet(file) aoi = gfMGRS.query(' tile == @MGRS_TILE ') # returns a geodataframe aoi # In[22]: s = aoi.iloc[0] #geoseries GEOMETRY = s.geometry.__geo_interface__ BOUNDS = ast.literal_eval(s.utm_bounds) EPSG = str(s.epsg) # In[23]: GRID = GeoBox.from_bbox(BOUNDS, crs=EPSG, #resolution=10, #resolution=20, #resolution=100, resolution=500, #resolution=1000, ) GRID # In[24]: # Need to separate b/c band naming scheme is not the same itemsL8 = [i for i in items if i.id.startswith('HLS.L')] itemsS2 = [i for i in items if i.id.startswith('HLS.S')] len(itemsL8) # In[25]: # Map to common band names # https://github.com/stac-extensions/eo#common-band-names common_names = { 'L8': {'B03':'green', 'B04':'red', 'B05':'nir08', 'B06':'swir16', }, 'S2': {'B03':'green', 'B04':'red', 'B8A':'nir08', 'B11':'swir16', } } # In[26]: L = odc.stac.load( itemsL8, chunks={'x': 256, 'y': 256}, geobox=GRID ) L # In[27]: S = odc.stac.load( itemsS2, chunks={'x': 256, 'y': 256}, #IF not set will actually read the pixels in eagerly! geobox=GRID ) S # In[28]: # Add 'L8' or 'S2' identifier as coordinate L = L.assign_coords(platform=('time',['L8',]*L.time.size)) S = S.assign_coords(platform=('time',['S2',]*S.time.size)) # In[29]: # Use common names mapping ('L8': {'B03':'green') S = S.rename_vars(common_names['S2']) L = L.rename_vars(common_names['L8']) # In[30]: # bands to keep track of bands = ['green', 'red', 'nir08', 'swir16', 'Fmask']# , 'browse'] #browse images have no georeferencing # In[31]: #xr.merge([ L[bands], S[bands] ]) # not lazy, not sure why... ds = xr.concat([ L[bands], S[bands] ], dim='time') #Lazy! but no alignment... # In[32]: ds # In[33]: # NOTE: concat does not 'align' along the time dimension np.all(np.diff(ds.time.values).astype(int) > 0) # In[34]: ds = ds.sortby('time') # In[35]: np.all(np.diff(ds.time.values).astype(int) > 0) # In[36]: # Add cloud-cover estimate from STAC metadata # Since pandas timeindex is used behind the scenes the metadata should be in the proper place ds = ds.assign_coords(cloud_cover=df['eo:cloud_cover']) # In[37]: # Need ~/.netrc to access full resolution NASA images and these GDAL settings import os os.environ['GDAL_HTTP_COOKIEJAR'] = '/tmp/cookie' os.environ['GDAL_HTTP_COOKIEFILE'] = '/tmp/cookie' os.environ['GDAL_DISABLE_READDIR_ON_OPEN']='EMPTY_DIR' # In[38]: # Common indicies (lazy) ds['ndsi'] = (ds.green - ds.swir16) / (ds.green + ds.swir16) ds['ndvi'] = (ds.nir08 - ds.red) / (ds.nir08 + ds.red) ds['ndwi'] = (ds.nir08 - ds.swir16) / (ds.nir08 + ds.swir16) # In[39]: ds # In[40]: ds.green.isel(time=0).hvplot.image(data_aspect=1) # In[41]: ds.ndvi.isel(time=0).hvplot.image(data_aspect=1, title='NDVI', clim=(0,1), cmap='greens') # In[42]: ds.ndwi.isel(time=0).hvplot.image(data_aspect=1, title='NDWI', clim=(0,1), cmap='blues') # In[43]: ds.ndsi.isel(time=0).hvplot.image(data_aspect=1, title='NDSI', clim=(0,1), cmap='magma') # In[44]: ds.ndvi.hvplot.image(x='x',y='y', data_aspect=1, rasterize=True, cmap='greens', clim=(0,1), ) # ## Masking # # HLS has a bit-packed mask (Fmask), which is a little tricky to work with: # # (LSB to MSB) # ``` # 0: cirrus # 1: cloud # 2: cloud/shadow adjacent # 3: cloud shadow # 4: snow/isce # 5: water # 6-7: aerosol level # - 11=high aerosol (3) # - 10 moderate (2) # - 01 low (1) # - 00 climatrology aerosol (0) # # But using python indexing we'll reverse these! # 0:1- aerosol level # 2: water # 3: snow/isce # 4: cloud shadow # 5: cloud adjacent # 6: cloud # 7: cirrus # ``` # In[45]: def extract_masks(da): ''' given a da.fmask return all mask arrays as an xarray dataset''' da = da.astype('uint8') dsM = da.to_dataset() explode = (da.shape[0], da.shape[1], 8) masks = np.unpackbits(da, axis=1).reshape(explode) aerosols = masks[:,:,0:2].sum(axis=-1) high_aerosol = (aerosols == 3) moderate_aerosol = (aerosols == 2) low_aerosol = (aerosols == 1) # Now to get a mask array it looks like this: masks = masks.astype('bool') water = masks[:,:,2] snow = masks[:,:,3] cloud_shadow = masks[:,:,4] cloud_adjacent = masks[:,:,5] cloud = masks[:,:,6] dsM['water'] = (('y','x'), water) dsM['snow'] = (('y','x'), snow) dsM['cloud_shadow'] = (('y','x'), cloud_shadow) dsM['cloud_adjacent'] = (('y','x'), cloud_adjacent) dsM['cloud'] = (('y','x'), cloud) dsM['high_aerosol'] = (('y','x'), high_aerosol) dsM['mod_aerosol'] = (('y','x'), moderate_aerosol) dsM['low_aerosol'] = (('y','x'), low_aerosol) dsM = dsM.drop_vars('Fmask') return dsM dsM = extract_masks(ds.Fmask.isel(time=0)) dsM # In[46]: daM = dsM.to_array(dim='mask') #daM.isel(mask=0).plot() # works daM.plot(col='mask'); # ## Extract NDVI Timeseries # In[47]: x = 4.28e5 y = 3.843e6 # In[48]: get_ipython().run_cell_magic('time', '', "\n# NOTE: investigate speeding up with direct cloud access\n# https://nasa-openscapes.github.io/2021-Cloud-Hackathon/tutorials/02_Data_Discovery_CMR-STAC_API.html#write-links-to-file-for-s3-access\n\nts = ds.ndvi.sel(x=x, y=y, method='nearest').compute()\nts\n") # In[49]: ts.plot.line('k.');