#!/usr/bin/env python
# coding: utf-8

# # Harmonized Landsat Sentinel
# 
# Important docs! and code examples
# 
# https://lpdaac.usgs.gov/documents/1326/HLS_User_Guide_V2.pdf
# 
# https://www.sciencedirect.com/science/article/pii/S0034425718304139
# 
# https://lpdaac.usgs.gov/data/get-started-data/collection-overview/missions/harmonized-landsat-sentinel-2-hls-overview/
# 
# 
# Needs updating, but helpful:
# https://lpdaac.usgs.gov/resources/e-learning/getting-started-cloud-native-hls-data-python/
# 
# And
# https://nbviewer.org/github/microsoft/AIforEarthDataSets/blob/main/data/hls.ipynb

# In[1]:


import geopandas as gpd
import pandas as pd

# mamba install odc-stac odc-geo pyarrow=10 -c conda-forge
import odc
from odc.geo.geobox import GeoBox
import odc.stac

import pystac_client
#import planetary_computer

import holoviews as hv
import hvplot.pandas
import hvplot.xarray

import xarray as xr

import fsspec
import ast
import numpy as np

import pystac

import os


# In[2]:


stac_client = pystac_client.Client.open('https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD')


# In[3]:


get_ipython().run_cell_magic('time', '', '\nsearch = stac_client.search(\n    collections = [\'HLSS30.v2.0\', \'HLSL30.v2.0\'],\n    intersects=dict(type="Point", coordinates=[-105.78, 35.79]), # Pecos Mtns NM\n    datetime = \'2022/2024\',\n)\n\nitems = search.get_all_items()\nprint(f\'Returned {len(items)} items\')\n')


# In[4]:


gf = gpd.GeoDataFrame.from_features( items.to_dict(), crs='EPSG:4326')


# In[5]:


# Instead of re-doing searches, can save and reload
# items.save_object_object('results.json')
# items = pystac.ItemCollection.from_file('results.json')
# gf = gpd.read_file('results.json')


# In[6]:


items[0].self_href


# In[7]:


items[0].assets['B03'].href


# In[8]:


gf['stac_id'] = [item.id for item in items]
gf['time'] = pd.to_datetime(gf.datetime) # Use standard xarray dimension name 'time' for index
gf.head()


# In[9]:


print(gf[gf.stac_id.str.startswith('HLS.L30')].shape)


# In[10]:


print(gf[gf.stac_id.str.startswith('HLS.S30')].shape)


# In[11]:


gf['platform'] = gf.stac_id.str[4] #'L' = Landsat, 'S' = Sentinel
gf['mgrs'] = gf.stac_id.str[9:14]


# In[12]:


gf.platform.value_counts()


# In[13]:


df = gf.set_index('time').tz_localize(None) # Drop timezone info (always UTC)
df = df.loc[:,['platform','eo:cloud_cover']] # Drop Geometry (#GeoPandasInterface leads to some errors (such as fill_color: expected an element of either String, Dict(Enum('expr', 'field', 'value', 'transform'), Either(String, Instance(Transform), Instance(Expression), Nullable(Color))) or Nullable(Color), got dim('cloud')
df.hvplot.scatter(y='platform', c='eo:cloud_cover', marker='s', width=1000) 


# In[14]:


iL = items[0] # landsat
iL


# In[15]:


# Do some index calculations

iS = items[-1] #sentinel 
iS


# In[16]:


iL.assets


# In[17]:


iS.assets


# In[18]:


iL.assets.keys()


# In[19]:


iS.assets.keys()


# In[20]:


iS.assets['B8A'].href


# In[21]:


MGRS_TILE = gf.mgrs.iloc[0]

url = 'https://github.com/scottyhq/mgrs/raw/main/MGRS_LAND.parquet'
with fsspec.open(url) as file:
    gfMGRS = gpd.read_parquet(file)
    
aoi = gfMGRS.query(' tile == @MGRS_TILE ') # returns a geodataframe
aoi


# In[22]:


s = aoi.iloc[0] #geoseries

GEOMETRY = s.geometry.__geo_interface__
BOUNDS = ast.literal_eval(s.utm_bounds)
EPSG = str(s.epsg)


# In[23]:


GRID = GeoBox.from_bbox(BOUNDS, 
                 crs=EPSG,
                 #resolution=10,
                 #resolution=20,
                 #resolution=100,
                 resolution=500,       
                 #resolution=1000,
                )
GRID


# In[24]:


# Need to separate b/c band naming scheme is not the same
itemsL8 = [i for i in items if i.id.startswith('HLS.L')]
itemsS2 = [i for i in items if i.id.startswith('HLS.S')]
len(itemsL8)


# In[25]:


# Map to common band names
# https://github.com/stac-extensions/eo#common-band-names

common_names = {
    'L8': {'B03':'green',
           'B04':'red',
           'B05':'nir08',
           'B06':'swir16',
          },
    'S2': {'B03':'green',
           'B04':'red',
           'B8A':'nir08',
           'B11':'swir16',
          }
}


# In[26]:


L = odc.stac.load(
    itemsL8,
    chunks={'x': 256, 'y': 256}, 
    geobox=GRID
)

L 


# In[27]:


S = odc.stac.load(
    itemsS2,
    chunks={'x': 256, 'y': 256}, #IF not set will actually read the pixels in eagerly!
    geobox=GRID
)

S


# In[28]:


# Add 'L8' or 'S2' identifier as coordinate
L = L.assign_coords(platform=('time',['L8',]*L.time.size))
S = S.assign_coords(platform=('time',['S2',]*S.time.size))


# In[29]:


# Use common names mapping ('L8': {'B03':'green')
S = S.rename_vars(common_names['S2'])
L = L.rename_vars(common_names['L8'])


# In[30]:


# bands to keep track of
bands = ['green', 'red', 'nir08', 'swir16', 'Fmask']# , 'browse'] #browse images have no georeferencing 


# In[31]:


#xr.merge([ L[bands], S[bands] ]) # not lazy, not sure why... 
ds = xr.concat([ L[bands], S[bands] ], dim='time') #Lazy! but no alignment... 


# In[32]:


ds


# In[33]:


# NOTE: concat does not 'align' along the time dimension
np.all(np.diff(ds.time.values).astype(int) > 0)


# In[34]:


ds = ds.sortby('time')


# In[35]:


np.all(np.diff(ds.time.values).astype(int) > 0)


# In[36]:


# Add cloud-cover estimate from STAC metadata
# Since pandas timeindex is used behind the scenes the metadata should be in the proper place
ds = ds.assign_coords(cloud_cover=df['eo:cloud_cover'])


# In[37]:


# Need ~/.netrc to access full resolution NASA images and these GDAL settings
import os
os.environ['GDAL_HTTP_COOKIEJAR'] = '/tmp/cookie'
os.environ['GDAL_HTTP_COOKIEFILE'] = '/tmp/cookie'
os.environ['GDAL_DISABLE_READDIR_ON_OPEN']='EMPTY_DIR'


# In[38]:


# Common indicies (lazy)

ds['ndsi'] = (ds.green -  ds.swir16) / (ds.green + ds.swir16)
ds['ndvi'] = (ds.nir08 - ds.red) / (ds.nir08 + ds.red)
ds['ndwi'] = (ds.nir08 - ds.swir16) / (ds.nir08 + ds.swir16) 


# In[39]:


ds


# In[40]:


ds.green.isel(time=0).hvplot.image(data_aspect=1)


# In[41]:


ds.ndvi.isel(time=0).hvplot.image(data_aspect=1, title='NDVI', 
                                  clim=(0,1), 
                                  cmap='greens')


# In[42]:


ds.ndwi.isel(time=0).hvplot.image(data_aspect=1, title='NDWI', 
                                  clim=(0,1), 
                                  cmap='blues')


# In[43]:


ds.ndsi.isel(time=0).hvplot.image(data_aspect=1, title='NDSI', 
                                  clim=(0,1), 
                                  cmap='magma')


# In[44]:


ds.ndvi.hvplot.image(x='x',y='y',
                     data_aspect=1,
                     rasterize=True,
                     cmap='greens',
                     clim=(0,1),
                    )


# ## Masking
# 
# HLS has a bit-packed mask (Fmask), which is a little tricky to work with:
# 
# (LSB to MSB)
# ```
# 0: cirrus
# 1: cloud
# 2: cloud/shadow adjacent
# 3: cloud shadow
# 4: snow/isce
# 5: water
# 6-7: aerosol level
#     - 11=high aerosol (3)
#     - 10 moderate (2)
#     - 01 low (1)
#     - 00 climatrology aerosol (0)
#     
#  But using python indexing we'll reverse these!
# 0:1- aerosol level
# 2: water
# 3: snow/isce
# 4: cloud shadow
# 5: cloud adjacent
# 6: cloud
# 7: cirrus
# ```

# In[45]:


def extract_masks(da):
    ''' given a da.fmask return all mask arrays as an xarray dataset'''
    da = da.astype('uint8')
    dsM = da.to_dataset()
    
    explode = (da.shape[0], da.shape[1], 8)
    masks = np.unpackbits(da, axis=1).reshape(explode)
    
    aerosols = masks[:,:,0:2].sum(axis=-1)
    high_aerosol = (aerosols == 3)
    moderate_aerosol = (aerosols == 2)
    low_aerosol = (aerosols == 1)

    # Now to get a mask array it looks like this:
    masks = masks.astype('bool')
    water = masks[:,:,2]
    snow = masks[:,:,3]
    cloud_shadow = masks[:,:,4]
    cloud_adjacent = masks[:,:,5]
    cloud = masks[:,:,6]
    
    dsM['water'] = (('y','x'), water)
    dsM['snow'] = (('y','x'), snow)
    dsM['cloud_shadow'] = (('y','x'), cloud_shadow)
    dsM['cloud_adjacent'] = (('y','x'), cloud_adjacent)
    dsM['cloud'] = (('y','x'), cloud)
    dsM['high_aerosol'] = (('y','x'), high_aerosol)
    dsM['mod_aerosol'] = (('y','x'), moderate_aerosol)
    dsM['low_aerosol'] = (('y','x'), low_aerosol)
    dsM = dsM.drop_vars('Fmask')

    return dsM


dsM = extract_masks(ds.Fmask.isel(time=0))
dsM


# In[46]:


daM = dsM.to_array(dim='mask')
#daM.isel(mask=0).plot() # works
daM.plot(col='mask');


# ## Extract NDVI Timeseries

# In[47]:


x = 4.28e5
y = 3.843e6


# In[48]:


get_ipython().run_cell_magic('time', '', "\n# NOTE: investigate speeding up with direct cloud access\n# https://nasa-openscapes.github.io/2021-Cloud-Hackathon/tutorials/02_Data_Discovery_CMR-STAC_API.html#write-links-to-file-for-s3-access\n\nts = ds.ndvi.sel(x=x, y=y, method='nearest').compute()\nts\n")


# In[49]:


ts.plot.line('k.');