# need to upgrade odc-stac to fix keyerror issue for some tiles
#!pip install -q odc-stac -U
!pip install -q odc-stac==0.3.6
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio as rio
import pystac
import pystac_client
import stackstac
import matplotlib.pyplot as plt
import xarray as xr
from datetime import datetime
import matplotlib
import ipyleaflet
import sys
import os
import dask_gateway
import planetary_computer
from rechunker import rechunk
sys.path.append('../sar_snowmelt_timing')
import s1_rtc_bs_utils
import contextily as ctx
import rioxarray as rxr
import skimage
import pathlib
import glob
import re
import time
import fsspec
import urllib.request
from urllib.error import URLError
from tqdm import tqdm
import getpass
import azure.storage.blob
import io
import adlfs
#sas_token = getpass.getpass() # prompts for the sas_token
sas_token = "se=2023-12-08T18%3A45Z&sp=racwdl&sv=2022-11-02&sr=c&skoid=598b2582-4d1d-4c4e-9bb2-3ad571b791b5&sktid=f6b6dd5b-f02f-441a-99a0-162ac5060bd2&skt=2023-12-01T18%3A46%3A01Z&ske=2023-12-08T18%3A45%3A00Z&sks=b&skv=2022-11-02&sig=up1iyCiyySNtrQnG%2BFb7yCIshOTLZ%2BFq9XphTbDeLxo%3D"
container_client = azure.storage.blob.ContainerClient(
"https://snowmelt.blob.core.windows.net",
container_name="snowmelt",
credential=sas_token,
)
fs = adlfs.AzureBlobFileSystem(account_name="snowmelt", credential=sas_token, anon=False)
# cluster = dask_gateway.GatewayCluster()
# client = cluster.get_client()
# cluster.adapt(minimum=10, maximum=50)
# print(client.dashboard_link)
cluster = dask_gateway.GatewayCluster()
client = cluster.get_client()
cluster.scale(50)
print(cluster.dashboard_link)
with fsspec.open('https://github.com/egagli/mgrs_tiles_snow_area/raw/main/mgrs_land_tiles_snow_area.parquet') as file:
mgrs_gdf = gpd.read_parquet(file)
mgrs_snow_gdf = mgrs_gdf[mgrs_gdf['mountain_seasonal_and_ephemeral_snow_area_km2']>100].sort_values(by='mountain_seasonal_and_ephemeral_snow_area_km2',ascending=False)
mgrs_snow_gdf = mgrs_snow_gdf[mgrs_snow_gdf.epsg<32700] # northern hemisphere
#mgrs_snow_gdf = mgrs_snow_gdf[mgrs_snow_gdf.intersects(gpd.read_file('../input/shapefiles/western_us_canada.geojson').geometry[0],align=True)] # western us and canada
mgrs_snow_gdf
mgrs_snow_gdf.explore(column='mountain_seasonal_and_ephemeral_snow_area_km2')
# nm = mgrs_snow_gdf.iloc[3000].tile
# bbox_gdf = mgrs_gdf[mgrs_gdf.tile==nm]
# f,ax=plt.subplots()
# bbox_gdf.geometry.boundary.plot(ax=ax,color='red')
# xmin, ymin, xmax, ymax = bbox_gdf.total_bounds
# ax.set_xlim([xmin-10,xmax+10])
# ax.set_ylim([ymin-10,ymax+10])
# ctx.add_basemap(ax=ax,source=ctx.providers.OpenTopoMap, crs=bbox_gdf.crs)
# classes = [ # page 13 of https://esa-worldcover.s3.amazonaws.com/v100/2020/docs/WorldCover_PUM_V1.0.pdf
# # 10, # treecover
# 20, # shrubland
# 30, # grassland
# 40, # cropland
# # 50, # built-up
# 60, #bare / sparse vegetation
# 70, # snow and ice
# # 80, # permanent water bodies
# 90, # herbaceous wetlands
# 95, # mangroves
# 100 # loss and lichen
# ]
# worldcover = s1_rtc_bs_utils.get_worldcover(ts_ds)
#ts_ds = ts_ds.where(worldcover.isin(classes))
resolution = 80
min_acqusitions = 6
years = ['allyears_median','allyears_std','all_years_polyfit','allyears_corr_strength','2015_median','2016_median','2017_median','2018_median','2019_median','2020_median','2021_median','2022_median','2023_median']
def get_runoff_onset_mgrs(ts_ds,return_seperate_orbits_and_polarizations=False, num_acquisitions_during_melt_season=6):
#orbits = s1_rtc_bs_utils.get_orbits_with_melt_season_coverage(ts_ds,num_acquisitions_during_melt_season=num_acquisitions_during_melt_season)
#ts_ds = ts_ds[ts_ds['sat:relative_orbit'].isin(orbits).compute()]
ts_ds = s1_rtc_bs_utils.remove_border_noise(ts_ds)
counts = ts_ds.groupby('sat:relative_orbit').count()
runoffs_int64 = ts_ds.groupby('sat:relative_orbit').map(lambda c: c.idxmin(dim='time')).astype(np.int64).where(counts>=num_acquisitions_during_melt_season)
if return_seperate_orbits_and_polarizations==False:
runoffs_int64 = runoffs_int64.where(runoffs_int64>0).median(dim=['sat:relative_orbit','band'],skipna=True)
return runoffs_int64.astype('datetime64[ns]')
for tile in tqdm(mgrs_snow_gdf.tile):
if any(tile in s for s in fs.glob(f'snowmelt/eric/global_4326/')):
print(f'folder {tile} already exists, skipping...')
continue
if any(tile in s for s in fs.open('snowmelt/eric/global/exclude.txt','r').readlines()):
print(f'{tile} is in exclude.txt, skipping...')
continue
print(f'working on tile {tile}...')
try:
tic = time.perf_counter()
bbox_gdf = mgrs_snow_gdf[mgrs_snow_gdf.tile==tile]
ts_ds = s1_rtc_bs_utils.get_s1_rtc_stac_odc_pc(bbox_gdf,start_time='2015-01-01',end_time='2023-12-31',resolution=resolution,epsg=f'EPSG:{bbox_gdf.epsg.values[0]}')
allyears = ts_ds.where((ts_ds['time.month']>=2) & (ts_ds['time.month']<8), drop=True).groupby('time.year').apply(
get_runoff_onset_mgrs,return_seperate_orbits_and_polarizations=False, num_acquisitions_during_melt_season=min_acqusitions)
runoffs_allyears = allyears.dt.dayofyear.where(allyears.dt.dayofyear>0).compute().dropna(dim='year',how='all').rio.write_nodata(np.iinfo(np.int16).min,encoded=True)
polyfits = runoffs_allyears.polyfit('year',deg=1,full=True,cov=True)
years = ['allyears_median','allyears_std','allyears_polyfit','allyears_corr_strength','2015_median','2016_median','2017_median','2018_median','2019_median','2020_median','2021_median','2022_median','2023_median']
for year in years:
#print(f'writing {year}...')
if year == 'allyears_std':
data_type = 'float32'
raster = runoffs_allyears.std(dim='year').rio.write_nodata(np.finfo(np.float32).min,encoded=True)
elif year == 'allyears_polyfit':
data_type = 'float32'
raster = polyfits.polyfit_coefficients[0].rio.write_nodata(np.finfo(np.float32).min,encoded=True).rio.set_crs(runoffs_allyears.rio.crs)
elif year == 'allyears_corr_strength':
data_type = 'float32'
raster = xr.corr(runoffs_allyears.year,runoffs_allyears,dim='year').rio.write_nodata(np.finfo(np.float32).min,encoded=True).rio.set_crs(runoffs_allyears.rio.crs)
elif year == 'allyears_median':
data_type = 'int16'
raster = runoffs_allyears.median(dim='year').rio.write_nodata(np.iinfo(np.int16).min,encoded=True)
else:
data_type = 'int16'
int_year = int(year[0:4])
if int_year in runoffs_allyears.year:
raster = runoffs_allyears.sel(year=int_year)
else:
continue
# actually changing the reasmpling here might be helpful
with io.BytesIO() as buffer:
raster.rio.to_raster(buffer, driver="COG", dtype=data_type)
buffer.seek(0)
blob_client = container_client.get_blob_client(f"eric/global/{tile}/runoff_onset_{tile}_{year}_{resolution}m.tif")
blob_client.upload_blob(buffer, overwrite=True)
with io.BytesIO() as buffer:
raster.rio.reproject('epsg:4326').rio.to_raster(buffer, driver="COG", dtype=data_type)
buffer.seek(0)
blob_client = container_client.get_blob_client(f"eric/global_4326/{tile}/runoff_onset_{tile}_{year}_{resolution}m_4326.tif")
blob_client.upload_blob(buffer, overwrite=True)
toc = time.perf_counter()
print(f'Processing time: {toc - tic:0.1f} seconds')
except Exception as e:
print(e)
exclude = tile
with io.BytesIO() as buffer:
with fs.open('snowmelt/eric/global/exclude.txt','r',newline='\n') as exclude_file:
exclude_list = exclude_file.readlines()
exclude_list = [x.replace('\n','') for x in exclude_list]
exclude_list.append(exclude)
np.savetxt(buffer,exclude_list, fmt='%s',delimiter='\n')
buffer.seek(0)
blob_client = container_client.get_blob_client('eric/global/exclude.txt')
blob_client.upload_blob(buffer, overwrite=True)
print(f'Failed, added {exclude} to exclude.txt')
fs.glob(f'snowmelt/eric/global_4326/{tile}/*')
url = "https://snowmelt.blob.core.windows.net/"
geotiff_list = fs.glob(f'snowmelt/eric/global/{tile}/*_????_median*')
year_list = []
for geotiff in geotiff_list:
year_list.append(re.findall("_(\d{4})_", geotiff)[0])
year_list = [int(year) for year in year_list]
time_var = xr.Variable('time', year_list)
runoff_allyears_vis = xr.concat([rxr.open_rasterio(f"{url}{i}") for i in geotiff_list],dim=time_var).squeeze().sortby('time')
runoff_allyears_vis = runoff_allyears_vis.where(runoff_allyears_vis>-32768)
runoff_allyears_vis.plot(col='time',col_wrap=3, vmin=80, vmax=240)
#runoff_allyears_vis.median(dim='time').where(runoff_allyears_vis.median(dim='time')>0).rio.write_nodata(-32768,encoded=True).plot(vmin=80,vmax=240)
rxr.open_rasterio(f'{url}snowmelt/eric/global_4326/47WMQ/runoff_onset_47WMQ_allyears_polyfit_80m_4326.tif',mask_and_scale=True).plot()
url = "https://snowmelt.blob.core.windows.net/"
geotiff_list = fs.glob(f'snowmelt/eric/global_4326/{tile}/*')
year_list = []
for geotiff in geotiff_list:
year_list.append(re.findall("_(\d{4})_", geotiff)[0])
year_list = [int(year) for year in year_list]
time_var = xr.Variable('time', year_list)
runoff_allyears_vis = xr.concat([rxr.open_rasterio(f"{url}{i}") for i in geotiff_list],dim=time_var).squeeze().sortby('time')
runoff_allyears_vis = runoff_allyears_vis.where(runoff_allyears_vis>-32768)
runoff_allyears_vis.plot(col='time',col_wrap=3, vmin=80, vmax=240)
runoff_allyears_vis.median(dim='time').where(runoff_allyears_vis.median(dim='time')>0).rio.write_nodata(-32768,encoded=True).plot(vmin=80,vmax=240)
#fn = f"{url}{fs.glob('snowmelt/eric/global/04WFA/')[3]}"
fn = f"{url}snowmelt/eric/global_4326/47WMQ/runoff_onset_47WMQ_allyears_std_80m_4326.tif"
fn = f"{url}snowmelt/eric/global_4326/47WMQ/runoff_onset_47WMQ_2021_median_80m_4326.tif"
fn
!gdalinfo $fn
# ts_ds = s1_rtc_bs_utils.get_s1_rtc_stac_odc_pc(bbox_gdf,start_time='2015-01-01',end_time='2023-12-31',resolution=resolution,epsg=f'EPSG:{bbox_gdf.epsg.values[0]}')
# allyears = ts_ds.where((ts_ds['time.month']>=2) & (ts_ds['time.month']<8), drop=True).groupby('time.year').apply(
# get_runoff_onset_2,return_seperate_orbits_and_polarizations=False, num_acquisitions_during_melt_season=min_acqusitions)
# %%time
# runoffs_allyears = allyears.dt.dayofyear.where(allyears.dt.dayofyear>0).compute().dropna(dim='year',how='all')
# runoffs_allyears.plot(col='year',col_wrap=4,vmin=80,vmax=240)
# runoffs_allyears#.rio.write_nodata(-32768,encoded=True)
# min_acqusitions = 8
# counts = ts_ds_oneyear.groupby('sat:relative_orbit').count()
# counts
# runoffs_median = s1_rtc_bs_utils.get_runoff_onset(ts_ds_oneyear,return_seperate_orbits_and_polarizations=True,num_acquisitions_during_melt_season=8)
# runoffs_median
# runoffs_median_masked = runoffs_median.where(counts>min_acqusitions)
# runoffs_median_masked
# runoffs_median_masked_compute = runoffs_median_masked.dt.dayofyear.where(runoffs_median_masked.dt.dayofyear>0).compute()
# runoffs_median_masked_compute
# runoffs_median_masked_compute.plot(col='band',row='sat:relative_orbit',vmin=80,vmax=240)
# runoffs_median_masked_compute.median(dim=['sat:relative_orbit','band'],skipna=True).plot(vmin=80,vmax=240)
# year = 2020
# melt_year = slice(f'{year}-02-01',f'{year}-08-01')
# ts_ds_oneyear = ts_ds.sel(time=melt_year)
# ts_ds_oneyear
#runoffs = get_runoff_onset_mgrs(ts_ds_oneyear, return_seperate_orbits_and_polarizations=True, num_acquisitions_during_melt_season=min_acqusitions).compute()
#runoffs.dt.dayofyear.where(runoffs.dt.dayofyear>0).plot(col='band',row='sat:relative_orbit',vmin=80,vmax=240)
# runoffs_med = get_runoff_onset_2(ts_ds_oneyear, return_seperate_orbits_and_polarizations=False, num_acquisitions_during_melt_season=min_acqusitions).compute()
# runoffs_med.dt.dayofyear.where(runoffs_med.dt.dayofyear>0).plot(vmin=80,vmax=240)
#read the exclude file
# with fs.open('snowmelt/eric/global/exclude.txt','r',newline='\n') as exclude_file:
# exclude_list = exclude_file.readlines()
# exclude_list = [x.replace('\n','') for x in exclude_list]
# exclude_list
#WARNING THIS LINE RESETS THE EXCLUDE FILE
##############fs.touch('snowmelt/eric/global/exclude.txt')