#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().system('pip install -q rasterstats') get_ipython().system('pip install -q odc-stac -U') # In[2]: import pandas as pd import geopandas as gpd import matplotlib.pyplot as plt import numpy as np import rasterstats import rioxarray as rxr import rasterio import adlfs import contextily as ctx from tqdm import tqdm from pathlib import Path # In[3]: #from dask.distributed import Client, LocalCluster #cluster = LocalCluster(processes=False, n_workers=4, local_directory='/tmp') #client = Client(cluster) #client # In[4]: import dask_gateway cluster = dask_gateway.GatewayCluster() client = cluster.get_client() cluster.adapt(minimum=10, maximum=100) print(client.dashboard_link) # In[5]: gf = gpd.read_file('https://raw.githubusercontent.com/scottyhq/snotel/967b0071f3716118bfcae866c740cffb51fbb5f5/snotel-sites.geojson') # In[6]: gf # In[7]: snotel_gf = gf[gf.isActive==True] snotel_gf = snotel_gf[(snotel_gf.beginDate<'2015-01-01') & (snotel_gf.endDate>'2022-01-01')] snotel_gf = snotel_gf[snotel_gf.state!='Alaska'] # In[8]: f,ax=plt.subplots() snotel_gf.plot(ax=ax) ctx.add_basemap(ax=ax, crs=snotel_gf.crs, source=ctx.providers.Stamen.Terrain, attribution=False) # In[9]: years = [2015,2016,2017,2018,2019,2020,2021,2022] #years = [2016,2017,2018,2019,2020,2021,2022] #years = [2017,2018,2019,2020,2021,2022] max_swe_years = [f'max_swe_{year}' for year in years] # In[10]: #for i,row in tqdm(snotel_gf.iterrows()): # try: # snotel_data = pd.read_parquet(f'https://github.com/scottyhq/snotel/raw/main/parquet/{row.code}.parquet').loc[slice(f'{years[0]}-01-01',f'{years[-1]}-12-31')] # snotel_data = snotel_data[(snotel_data.index.month>1)&(snotel_data.index.month<10)] # dates = snotel_data.groupby(snotel_data.index.year).idxmax()['WTEQ'] #print(dates) # if len(dates)==len(years): # snotel_gf.loc[i,max_swe_years] = dates.values # except: # pass #date_index = [] #for date in dates.index.values: # date_index.append(f'max_swe_{date}') #snotel_gf = snotel_gf.loc[i,iter(date_index)] = dates.loc[2015:2022] # In[11]: get_ipython().run_cell_magic('time', '', 'my_file = Path("snotel_gf_comparison.geojson")\nif my_file.is_file():\n snotel_gf = gpd.read_file(\'snotel_gf_comparison.geojson\')\nelse:\n for i,row in tqdm(snotel_gf.iterrows()):\n try:\n snotel_data = pd.read_parquet(f\'https://github.com/scottyhq/snotel/raw/main/parquet/{row.code}.parquet\').loc[slice(f\'{years[0]}-01-01\',f\'{years[-1]}-12-31\')]\n snotel_data = snotel_data[(snotel_data.index.month>1)&(snotel_data.index.month<8)]\n\n dates = snotel_data.groupby(snotel_data.index.year).idxmax()[\'WTEQ\']\n dates = dates[dates.notna()]\n #print(dates)\n #if len(dates)==len(years):\n # snotel_gf.loc[i,max_swe_years] = dates.values\n for year in [date for date in dates.index]:\n snotel_gf.loc[i,f\'max_swe_{year}\'] = int(dates.dt.dayofyear[year])\n except:\n pass\n \n #for year in years:\n # snotel_gf[f\'max_swe_{year}\'] = pd.to_datetime(snotel_gf[f\'max_swe_{year}\']).dt.dayofyear\n \n snotel_gf.to_file("snotel_gf_comparison.geojson", driver=\'GeoJSON\')\n') # In[12]: snotel_gf # In[13]: snotel_gf = snotel_gf.to_crs('EPSG:32611') # In[14]: buffer = 500 # at 1000m with 2015 excluded, medians of medians at 38 days, with 500m 34 days, with 200m 36.25 days, with 2000m 37 days # 27 days new method 500m method, 22.0 with 1000m new new method, 23.5 with 500m new new method, 22.0 with 2000m snotel_gf_buff = gpd.GeoDataFrame(snotel_gf.copy(),geometry=snotel_gf.buffer(buffer)) # In[15]: snotel_gf # In[16]: sas_token = 'se=2023-03-13T02%3A45Z&sp=racwdl&sv=2018-11-09&sr=c&skoid=b4d39ccb-ca9c-4e9d-a183-5e0f3ba3e609&sktid=f6b6dd5b-f02f-441a-99a0-162ac5060bd2&skt=2023-03-06T20%3A38%3A54Z&ske=2023-03-13T02%3A45%3A00Z&sks=b&skv=2018-11-09&sig=Nl9p59uFIaJRJgtbHhnVBgYgwh1MnZ/fcJrttie9gAM%3D' tile_names = [tile[-5:] for tile in adlfs.AzureBlobFileSystem(account_name="snowmelt", credential=sas_token, anon=False).ls('snowmelt/eric/MGRS/')] # In[17]: stats=['count', 'min', 'max', 'mean', 'median', 'std'] # In[18]: resolution = 80 # In[19]: #raster = rasterio.open(f"https://snowmelt.blob.core.windows.net/snowmelt/eric/MGRS/{'13UDP'}/runoff_onset_{'13UDP'}_{2016}_median_{resolution}m.tif") #raster.read(1) # In[ ]: for i,row in tqdm(snotel_gf.iterrows()): if row.mgrs in tile_names: #print(f'{row.code} in MGRS square {row.mgrs}') for year in years: try: raster = rasterio.open(f"https://snowmelt.blob.core.windows.net/snowmelt/eric/MGRS/{row.mgrs}/runoff_onset_{row.mgrs}_{year}_median_{resolution}m.tif") snotel_row_proj = snotel_gf_buff[snotel_gf_buff.index==i].to_crs(raster.crs) statistics = rasterstats.zonal_stats(snotel_row_proj, raster.read(1), affine=raster.transform, stats=stats, nodata=-32768) snotel_gf.loc[i,f'sar_runoff_{year}'] = statistics[0]['median'] snotel_gf.loc[i,f'sar_runoff_{year}_std'] = statistics[0]['std'] if year == 2022: snotel_gf.loc[i,f'sar_runoff_pixel_count'] = statistics[0]['count'] except: #print(f'Error with tile {row.mgrs} for year {year}') pass # In[ ]: for year in years: snotel_gf[f'offset_{year}'] = snotel_gf[f'max_swe_{year}'] - snotel_gf[f'sar_runoff_{year}'] # In[ ]: snotel_gf['median_offset'] = snotel_gf[snotel_gf.columns[pd.Series(snotel_gf.columns).str.startswith('offset')]].abs().median(axis=1) snotel_gf['mean_offset'] = snotel_gf[snotel_gf.columns[pd.Series(snotel_gf.columns).str.startswith('offset')]].abs().mean(axis=1) snotel_gf['mean_std'] = snotel_gf[snotel_gf.columns[pd.Series(snotel_gf.columns).str.endswith('std')]].mean(axis=1) # In[ ]: snotel_gf.sort_values('mean_std').dropna() # In[ ]: snotel_gf = snotel_gf[(snotel_gf['mean_std']>0) & (snotel_gf['mean_std']<25)] # In[ ]: #snotel_gf.sort_values('mean_offset').dropna().tail(50) snotel_gf.sort_values('mean_offset').head(20) # In[ ]: snotel_gf.sort_values('sar_runoff_pixel_count').dropna().tail(20) # In[27]: for year in years: f,ax=plt.subplots(1,2,figsize=(10,4)) pairwise = snotel_gf[snotel_gf[[f'sar_runoff_{year}', f'max_swe_{year}']].notnull().all(1)] ax[0].hist(pairwise[f'sar_runoff_{year}'],bins=30,alpha=0.5,label='SAR runoff dates') ax[0].hist(pairwise[f'max_swe_{year}'],bins=30,alpha=0.5,label='SNOTEL Max SWE') ax[0].legend() ax[0].set_xlim([0,365]) ax[0].set_title(f'{year} Runoff Date / Max SWE Agreement') ax[1].hist(snotel_gf[f'offset_{year}'],bins=50,label='pairwise offsets') ax[1].legend() mean = snotel_gf[f'offset_{year}'].mean() median = snotel_gf[f'offset_{year}'].median() ax[1].set_title(f'{year} Mean offset = {mean:.2f} Median offset = {median:.2f}') ax[1].set_xlim([-100,100]) # In[28]: f,ax=plt.subplots(2,1,figsize=(10,10)) snotel_gf['median_offset'].hist(bins=50,ax=ax[0]) snotel_gf['mean_offset'].hist(bins=50,ax=ax[1]) ax[0].set_title('(Peak SNOTEL SWE - SAR Derivied Runoff Onset) 2017-2022 Median Offset for Each Station') ax[1].set_title('(Peak SNOTEL SWE - SAR Derivied Runoff Onset) 2017-2022 Mean Offset for Each Station') ax[0].set_xlabel('Offset [Days]') ax[1].set_xlabel('Offset [Days]') # In[29]: snotel_gf['median_offset'].median() # 25 all years # 22.5 2017 on # In[30]: f,ax=plt.subplots(figsize=(10,10)) #snotel_gf_projected = gpd.GeoDataFrame(snotel_gf,geometry=gf.geometry).to_crs('EPSG:32611') snotel_gf.plot(column='median_offset',ax=ax,legend=True,cmap='cool',vmin=0,vmax=60) ax.axis('equal') ax.set_title('2015-2022 Median Absolute Difference between \nPeak SNOTEL SWE and SAR Derivied Runoff Onset [Days]') ctx.add_basemap(ax=ax, crs=snotel_gf.crs, source=ctx.providers.Stamen.Terrain, attribution=False) # In[31]: tooltip_list = ['code','name','elevation_m'] for year in years: tooltip_list.append(f'offset_{year}') tooltip_list.append(f'sar_runoff_{year}_std') tooltip_list.extend(['median_offset','mean_offset','sar_runoff_pixel_count']) # In[32]: snotel_gf = snotel_gf.drop(['beginDate','endDate','isActive'],axis=1) # In[33]: snotel_gf.explore(column='median_offset',cmap='cool',tooltip=tooltip_list,vmin=0,vmax=60) # In[34]: f,ax=plt.subplots() ax.scatter(snotel_gf['sar_runoff_pixel_count'],snotel_gf['median_offset']) #ax.set_xlim([0,10]) # In[35]: f,ax=plt.subplots() ax.scatter(snotel_gf['elevation_m'],snotel_gf['median_offset']) # In[36]: f,ax=plt.subplots() ax.scatter(snotel_gf['mean_std'],snotel_gf['median_offset']) # In[37]: corr = snotel_gf.corr() corr.style.background_gradient(cmap='coolwarm') # In[38]: snotel_gf.describe() # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[54]: f,ax=plt.subplots(figsize=(10,10)) #snotel_gf_projected = gpd.GeoDataFrame(snotel_gf,geometry=gf.geometry).to_crs('EPSG:32611') snotel_gf[snotel_gf.max_swe_2019<90].plot(ax=ax,legend=True) snotel_gf[snotel_gf.max_swe_2019>90].plot(ax=ax,legend=True) ax.axis('equal') ax.set_title('Checking out the bimodal distribution of 2017 peak SWE') ctx.add_basemap(ax=ax, crs=snotel_gf.crs, source=ctx.providers.Stamen.Terrain, attribution=False) # In[55]: f,ax=plt.subplots(figsize=(10,10)) #snotel_gf_projected = gpd.GeoDataFrame(snotel_gf,geometry=gf.geometry).to_crs('EPSG:32611') #snotel_gf[snotel_gf.max_swe_2017<80].plot(ax=ax,x='elevation_m',y='max_swe_2017') #snotel_gf[snotel_gf.max_swe_2017>80].plot(ax=ax,x='elevation_m',y='max_swe_2017') ax.scatter(snotel_gf[snotel_gf.max_swe_2019<90].elevation_m,snotel_gf[snotel_gf.max_swe_2019<90].max_swe_2019) ax.scatter(snotel_gf[snotel_gf.max_swe_2019>90].elevation_m,snotel_gf[snotel_gf.max_swe_2019>90].max_swe_2019) ax.set_ylim([0,200]) #ax.axis('equal') ax.set_title('2017 SNOTEL DOY of Peak SWE vs Elevation') #ctx.add_basemap(ax=ax, crs=snotel_gf.crs, source=ctx.providers.Stamen.Terrain, attribution=False) # In[ ]: