I’ll be working with python, so I will use ESDA: Exploratory Spatial Data Analysis module of PySAL (Python Spatial Analysis Library) for Moran’s I, and SciKit GStat for variograms. Good stuff on spatial autocorrelation using python here: https://geographicdata.science/book/notebooks/06_spatial_autocorrelation.html
import numpy as np
import pandas as pd
import geopandas as gpd
from dask.distributed import Client
import py3dep
import geopandas as gpd
import rasterio as rio
import pystac
import pystac_client
import stackstac
import math
import shapely
import matplotlib.pyplot as plt
import os
import xarray as xr
from datetime import datetime
import ulmo
from datetime import datetime
import sys
from pysal.explore import esda # Exploratory Spatial analytics
from pysal.lib import weights
from pysal.viz import splot
from splot.esda import plot_moran
import seaborn
import skgstat as skg
sys.path.append('../sar_snowmelt_timing')
import s1_rtc_bs_utils
/mnt/working/egagli/sw/miniconda3/envs/aws-rtc-stac2/lib/python3.8/site-packages/spaghetti/network.py:36: FutureWarning: The next major release of pysal/spaghetti (2.0.0) will drop support for all ``libpysal.cg`` geometries. This change is a first step in refactoring ``spaghetti`` that is expected to result in dramatically reduced runtimes for network instantiation and operations. Users currently requiring network and point pattern input as ``libpysal.cg`` geometries should prepare for this simply by converting to ``shapely`` geometries. warnings.warn(f"{dep_msg}", FutureWarning)
# GDAL environment variables for better performance
os.environ['AWS_REGION']='us-west-2'
os.environ['GDAL_DISABLE_READDIR_ON_OPEN']='EMPTY_DIR'
os.environ['AWS_NO_SIGN_REQUEST']='YES'
#
xr.set_options(keep_attrs=True)
# Paste /proxy/localhost:8787 for cluster diagnostics
client = Client(local_directory='/tmp', processes=False)
client
Client-17960e7b-150b-11ed-b78b-04421a0b914b
Connection method: Cluster object | Cluster type: distributed.LocalCluster |
Dashboard: http://172.25.45.79:8787/status |
1281ce92
Dashboard: http://172.25.45.79:8787/status | Workers: 1 |
Total threads: 32 | Total memory: 125.71 GiB |
Status: running | Using processes: False |
Scheduler-20295bc3-58f9-4674-9caf-9963a13b91a2
Comm: inproc://172.25.45.79/46987/1 | Workers: 1 |
Dashboard: http://172.25.45.79:8787/status | Total threads: 32 |
Started: Just now | Total memory: 125.71 GiB |
Comm: inproc://172.25.45.79/46987/4 | Total threads: 32 |
Dashboard: http://172.25.45.79:46739/status | Memory: 125.71 GiB |
Nanny: None | |
Local directory: /tmp/dask-worker-space/worker-3iuodmoa |
rainier_bbox_gdf = gpd.read_file('../input/shapefiles/mt_rainier.geojson')
rainier_sar_ds=s1_rtc_bs_utils.get_s1_rtc_stac(rainier_bbox_gdf,start_time='2020-01-01',end_time='2021-01-01',orbit_direction='ascending',polarization='gamma0_vv',collection='../input/sentinel1-rtc-aws/mycollection_rainier.json')
%%time
# compute frames, drop missing data
rainier_sar_ds = rainier_sar_ds.compute()
rainier_sar_ds = rainier_sar_ds.dropna('time',how='all')
CPU times: user 5.07 s, sys: 616 ms, total: 5.69 s Wall time: 4.34 s
f,axs=plt.subplots(6,10,figsize=(30,16))
for i,ax in enumerate(axs.flatten()):
rainier_sar_ds.isel(time=i).plot(ax=ax,vmax=1.5,cmap='gray',add_colorbar=False)
ax.set_xlabel('')
ax.set_ylabel('')
ax.set_xticklabels('')
ax.set_yticklabels('')
#ax.set_aspect('equal')
ax.set_title(f'DOY = {rainier_sar_ds.isel(time=i).time.dt.dayofyear.values}')
plt.tight_layout(rect=[0, 0, 1, 0.95])
f.suptitle('Sentinel-1 SAR Backscatter Timeseries \n Mt. Rainier')
Text(0.5, 0.98, 'Sentinel-1 SAR Backscatter Timeseries \n Mt. Rainier')
dem = s1_rtc_bs_utils.get_py3dep_dem(rainier_sar_ds)
aspect = s1_rtc_bs_utils.get_py3dep_aspect(rainier_sar_ds)
slope = s1_rtc_bs_utils.get_py3dep_slope(rainier_sar_ds)
dah = s1_rtc_bs_utils.get_dah(rainier_sar_ds)
f,ax=plt.subplots(1,4,figsize=(25,5))
dem.plot(ax=ax[0])
aspect.plot(ax=ax[1])
slope.plot(ax=ax[2])
dah.plot(ax=ax[3])
ax[0].set_title('py3dep 10m DEM \n Mt. Rainier')
ax[1].set_title('py3dep 10m Aspect \n Mt. Rainier')
ax[2].set_title('py3dep 10m Slope \n Mt. Rainier')
ax[3].set_title('10m Diurnal Anisotropic Heating Index (DAH) \n Mt. Rainier')
Text(0.5, 1.0, '10m Diurnal Anisotropic Heating Index (DAH) \n Mt. Rainier')
summer_ndvi_ds = s1_rtc_bs_utils.get_median_ndvi(rainier_sar_ds)
stats_full_df = s1_rtc_bs_utils.get_stats(rainier_sar_ds,dem=dem,aspect=aspect,slope=slope,dah=dah)
stats_ndvi_mask_df = s1_rtc_bs_utils.get_stats(rainier_sar_ds.where(summer_ndvi_ds<0.2,drop=True),dem=dem,aspect=aspect,slope=slope,dah=dah)
rainier_glaciers = gpd.read_file('../input/shapefiles/rainier_glaciers.geojson').to_crs(rainier_sar_ds.crs)
stats_glacier_mask_df = s1_rtc_bs_utils.get_stats(rainier_sar_ds.rio.clip(rainier_glaciers.geometry,rainier_glaciers.crs,drop=False,invert=False),dem=dem,aspect=aspect,slope=slope,dah=dah)
stats_full_ds = stats_full_df.to_xarray()
stats_ndvi_mask_ds = stats_ndvi_mask_df.to_xarray()
stats_glacier_mask_ds = stats_glacier_mask_df.to_xarray()
f,ax=plt.subplots(1,3,figsize=(30,8))
stats_full_ds['runoff_dates'].plot(ax=ax[0],cmap='twilight')
stats_ndvi_mask_ds['runoff_dates'].plot(ax=ax[1],cmap='twilight')
stats_glacier_mask_ds['runoff_dates'].plot(ax=ax[2],cmap='twilight')
ax[0].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nMt. Rainier')
ax[1].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nMt. Rainier NDVI Mask')
ax[2].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nMt. Rainier Glacier Mask')
Text(0.5, 1.0, 'Backscatter-derived Snowmelt Runoff Onset DOY \nMt. Rainier Glacier Mask')
As an aside, we are using DOY values and a cyclical colormap. 0,365 are adjacent in true value for our analysis, but also represent the points in our domain with the greatest mathematical difference / euclidean distance. This will be problematic for regressions, interpolations, statistics, etc. Suggestions here on ways to rescale or represent the data are very welcome.
def get_semivariogram(stats_df,variable,sampling_frac=.001,n_lags=30,maxlag=None):
#https://guillaumeattard.com/geostatistics-applied-to-hydrogeology-with-scikit-gstat/
#https://scikit-gstat.readthedocs.io/en/latest/userguide/variogram.html
#https://aegis4048.github.io/spatial-simulation-1-basics-of-variograms
#https://www.aspexit.com/variogram-and-spatial-autocorrelation/
stats_df = stats_df.sample(frac=sampling_frac)
V = skg.Variogram(list(zip(stats_df.geometry.y, stats_df.geometry.x)), stats_df[variable],n_lags=n_lags,maxlag=maxlag,use_nugget=True)
return V
f,ax=plt.subplots(2,3,figsize=(30,8),sharey=False,gridspec_kw={'height_ratios': [1, 6]})
V_full = get_semivariogram(stats_full_df,'runoff_dates',sampling_frac=0.03);
V_full.plot(axes=ax[::-1,0]);
V_ndvi = get_semivariogram(stats_ndvi_mask_df,'runoff_dates',sampling_frac=0.03);
V_ndvi.plot(axes=ax[::-1,1]);
V_gm = get_semivariogram(stats_glacier_mask_df,'runoff_dates',sampling_frac=0.03);
V_gm.plot(axes=ax[::-1,2]);
#V.scattergram();
#V.distance_difference_plot();
#variogram interpreataion http://www.statios.com/resources/04-variogram.pdf
Lots to talk about! The experimental variograms all look very different. Specifically, we notice there is not a well defined sill for any of them. This could be due to me not choosing a cutoff / max lag distance, but I think the variogram behavior at the farthest lags is quite interesting. For the entire scene, our variogram looks exponential. It is likely the unmasked areas are not displaying a snowmelt signal and are likely controlled by other dynamics and are thus quite chaotic at the farthest reaches, so pairwise variances with the largest lags will be very high. I think fixing my representation of DOY as [0,365] might play a role in this. The glacier masked variogram actually does the opposite at the farthest spatial lags and dips quite dramatically. I think this is due to the radial symmetry present in the data due to topography… the biggest spatial lags are all at low elevations, and low elevations tend to have similar snowmelt date values. The NDVI masked scene variogram has characteristics of both the unmasked and the glacier masked scene, with combined exponential behavior and a large dip. The glacier masked scene’s variogram displays the most normal variogram behavior. It looks like the nugget has a variance value of 1050, and the range is about 5000m or 5km. This hints that on average for this particular glacier masked scene, within 5km snowmelt runoff onset date tends to be correlated.
def get_morans_local_i(stats_df,sampling_frac=.001):
# to avoid spatial autocorrelation we must sample
# https://www.statisticshowto.com/morans-i/
# https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2018WR022553
# calculating moran's i for the sample
# https://geographicdata.science/book/notebooks/07_local_autocorrelation.html
stats_df = stats_df.sample(frac=sampling_frac)
w = weights.distance.KNN.from_dataframe(stats_df, k=8)
# Row-standardization
w.transform = 'R'
lisa = esda.moran.Moran_Local(stats_df['runoff_dates'], w)
stats_df['moran_i'] = lisa.Is
stats_df['moran_i_p_val'] = lisa.p_sim
stats_df['moran_i_p_val_sig'] = stats_df['moran_i_p_val'] < 0.05
return stats_df, lisa
def plot_morans_local_i(stats_df,sampling_frac=0.01):
stats_ds = stats_df.to_xarray()
stats_df,lisa=get_morans_local_i(stats_df,sampling_frac=sampling_frac)
seaborn.kdeplot(lisa.Is,ax=ax[0])
seaborn.rugplot(lisa.Is,ax=ax[0])
ax[0].set_title('Morans Local I PDF')
stats_ds['runoff_dates'].plot(ax=ax[1],cmap='Greys_r',add_colorbar=False)
#stats_full_df.plot.scatter(x='x',y='y',c='white',ax=ax[1])
#stats_full_ds['moran_i'].plot(vmin=-1,vmax=1)
stats_df.reset_index().plot.scatter(x='x',y='y',c='moran_i', cmap='coolwarm',s=10,legend=True,ax=ax[1],vmin=-1,vmax=1)
ax[1].set_title('Spatial Distribution of Morans Local I')
stats_ds['runoff_dates'].plot(ax=ax[2],cmap='Greys_r')
stats_df[stats_df['moran_i_p_val_sig']].reset_index().plot.scatter(x='x',y='y',c='r',legend=True,ax=ax[2],s=10)
stats_df[~stats_df['moran_i_p_val_sig']].reset_index().plot.scatter(x='x',y='y',c='b',legend=True,ax=ax[2],s=10)
ax[2].set_title('Moran I Significance \n (Red=Significant, Blue=Not Significant)') # red is spatially correlated, blue is not
#ax[0].set_aspect('equal')
ax[1].set_aspect('equal')
ax[2].set_aspect('equal')
ax[1].set_xticklabels('')
ax[1].set_yticklabels('')
ax[2].set_xticklabels('')
ax[2].set_yticklabels('')
ax[1].set_xlabel('')
ax[1].set_ylabel('')
ax[2].set_xlabel('')
ax[2].set_ylabel('')
def get_morans_global_i(stats_df,sampling_frac=1):
stats_df = stats_df.sample(frac=sampling_frac)
w = weights.distance.KNN.from_dataframe(stats_df, k=8)
# Row-standardization
w.transform = 'R'
moran = esda.moran.Moran(stats_df['runoff_dates'], w)
#stats_df['moran_i'] = lisa.Is
#stats_df['moran_i_p_val'] = lisa.p_sim
#stats_df['moran_i_p_val_sig'] = stats_df['moran_i_p_val'] < 0.05
return moran
def plot_morans_global_i(stats_df,sampling_frac=.001):
#https://geographicdata.science/book/notebooks/06_spatial_autocorrelation.html
#mi = get_morans_global_i(stats_glacier_mask_df,sampling_frac=1)
#seaborn.kdeplot(mi.sim, shade=True)
#plt.vlines(mi.I, 0, 1, color='r')
#plt.vlines(mi.EI, 0,1)
#plt.xlabel("Moran's I")
moran = get_morans_global_i(stats_df,sampling_frac=sampling_frac)
plot_moran(moran)
plot_morans_global_i(stats_full_df,sampling_frac=1)
plot_morans_global_i(stats_ndvi_mask_df,sampling_frac=1)
plot_morans_global_i(stats_glacier_mask_df,sampling_frac=1) #hmmmmmm such high spatial autocorrelation, yet low correlation between (elevation,insolation) and snowmelt runoff date
Moran’s Global I is incredibly large for all 3 scenes. The reference distribution is not even visible on the left hand plots. This signals to us that these scenes are insanely spatially autocorrelated, as expected. Moran’s I gets progressively larger with the progressive masking, which makes sense (by inspection the glacier masked scene has smoother spatial trends and clustering). Let’s take a peak a Moran’s Global I for some more granular analysis.
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_full_df,sampling_frac=0.001) #but now lets zoom in and look at clusters. where is this high autocorr coming from?
plt.tight_layout()
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_ndvi_mask_df,sampling_frac=0.002) #but now lets zoom in and look at clusters. where is this high autocorr coming from?
plt.tight_layout()
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_glacier_mask_df,sampling_frac=0.003) #but now lets zoom in and look at clusters. where is this high autocorr coming from?
plt.tight_layout()
Moran’s Local I shows similar areas of high spatial autocorrelation between the three scenes. The top of the mountain is shown to be highly spatially autocorrelated in all scenes. In the unmasked scene, we even see areas of significance that are highly anticorrelated in the vegetated areas. Again, this makes sense because it is likely not snowmelt dynamics controlling backscatter minima in these areas. Areas low on the glacier have progressively higher Moran’s I values as we progressively mask. This likely has to do with the adjacent chaotic areas in the unmasked scene contributing to lowering Moran’s I values adjacent to these chaotic areas. Let’s peak at the correlations between land surface characteristics and snowmelt date.
stats_full_df.corr()
elevation | aspect | slope | dah | runoff_dates | ripening_dates | runoff_prediction | ripening_prediction | |
---|---|---|---|---|---|---|---|---|
elevation | 1.000000 | -0.072958 | 0.152420 | -0.020688 | 0.308848 | -0.145129 | 0.983107 | -0.846308 |
aspect | -0.072958 | 1.000000 | 0.037673 | 0.258907 | -0.013517 | -0.117997 | -0.118847 | -0.075399 |
slope | 0.152420 | 0.037673 | 1.000000 | 0.042553 | 0.070794 | -0.000618 | 0.141477 | -0.153347 |
dah | -0.020688 | 0.258907 | 0.042553 | 1.000000 | -0.063877 | -0.088327 | -0.203329 | -0.515071 |
runoff_dates | 0.308848 | -0.013517 | 0.070794 | -0.063877 | 1.000000 | -0.076210 | 0.314154 | -0.230750 |
ripening_dates | -0.145129 | -0.117997 | -0.000618 | -0.088327 | -0.076210 | 1.000000 | -0.125958 | 0.171485 |
runoff_prediction | 0.983107 | -0.118847 | 0.141477 | -0.203329 | 0.314154 | -0.125958 | 1.000000 | -0.734513 |
ripening_prediction | -0.846308 | -0.075399 | -0.153347 | -0.515071 | -0.230750 | 0.171485 | -0.734513 | 1.000000 |
stats_ndvi_mask_df.corr()
elevation | aspect | slope | dah | runoff_dates | ripening_dates | runoff_prediction | ripening_prediction | |
---|---|---|---|---|---|---|---|---|
elevation | 1.000000 | -0.011410 | 0.288349 | 0.042121 | 0.505173 | -0.164520 | 0.976701 | -0.855418 |
aspect | -0.011410 | 1.000000 | 0.048912 | 0.308677 | -0.022923 | -0.088969 | -0.077550 | -0.150506 |
slope | 0.288349 | 0.048912 | 1.000000 | 0.028636 | 0.103908 | 0.005820 | 0.278088 | -0.255207 |
dah | 0.042121 | 0.308677 | 0.028636 | 1.000000 | -0.089621 | -0.106455 | -0.173273 | -0.553510 |
runoff_dates | 0.505173 | -0.022923 | 0.103908 | -0.089621 | 1.000000 | -0.138292 | 0.517224 | -0.374644 |
ripening_dates | -0.164520 | -0.088969 | 0.005820 | -0.106455 | -0.138292 | 1.000000 | -0.139310 | 0.192327 |
runoff_prediction | 0.976701 | -0.077550 | 0.278088 | -0.173273 | 0.517224 | -0.139310 | 1.000000 | -0.724337 |
ripening_prediction | -0.855418 | -0.150506 | -0.255207 | -0.553510 | -0.374644 | 0.192327 | -0.724337 | 1.000000 |
stats_glacier_mask_df.corr()
elevation | aspect | slope | dah | runoff_dates | ripening_dates | runoff_prediction | ripening_prediction | |
---|---|---|---|---|---|---|---|---|
elevation | 1.000000 | -0.045983 | 0.373325 | 0.041383 | 0.571092 | -0.128201 | 0.990514 | -0.840287 |
aspect | -0.045983 | 1.000000 | -0.022456 | 0.356095 | -0.041766 | -0.005894 | -0.094781 | -0.155613 |
slope | 0.373325 | -0.022456 | 1.000000 | -0.070506 | 0.188355 | -0.044186 | 0.381605 | -0.267060 |
dah | 0.041383 | 0.356095 | -0.070506 | 1.000000 | -0.055523 | -0.087948 | -0.096301 | -0.576451 |
runoff_dates | 0.571092 | -0.041766 | 0.188355 | -0.055523 | 1.000000 | -0.157640 | 0.576561 | -0.436930 |
ripening_dates | -0.128201 | -0.005894 | -0.044186 | -0.087948 | -0.157640 | 1.000000 | -0.115619 | 0.152568 |
runoff_prediction | 0.990514 | -0.094781 | 0.381605 | -0.096301 | 0.576561 | -0.115619 | 1.000000 | -0.757821 |
ripening_prediction | -0.840287 | -0.155613 | -0.267060 | -0.576451 | -0.436930 | 0.152568 | -0.757821 | 1.000000 |
The elevation correlation with runoff date increases from 0.31 in the unmasked scene, to 0.51 in the NDVI masked scene, to 0.57 in the glacier masked scene. Aspect controls / DAH seem very small. Should investigate this more.
# once we choose to use whichever masked data, now make coarser resolution copy high-medium-low res 20m 40m 100m 500m
# then analyze over smaller extent (entire,west,east)
rainier_sar_40m_ds = rainier_sar_ds.rio.reproject(dst_crs=rainier_sar_ds.crs,resolution=40)#.replace(_FillValue, np.nan)
clipped = rainier_sar_40m_ds.rio.clip(rainier_glaciers.geometry,rainier_glaciers.crs,drop=False,invert=False)
clipped = clipped.where(clipped<100000)
stats_glacier_mask_40m_df = s1_rtc_bs_utils.get_stats(clipped)
stats_glacier_mask_40m_ds = stats_glacier_mask_40m_df.to_xarray()
rainier_sar_100m_ds = rainier_sar_ds.rio.reproject(dst_crs=rainier_sar_ds.crs,resolution=100)
clipped = rainier_sar_100m_ds.rio.clip(rainier_glaciers.geometry,rainier_glaciers.crs,drop=False,invert=False)
clipped = clipped.where(clipped<100000)
stats_glacier_mask_100m_df = s1_rtc_bs_utils.get_stats(clipped)
stats_glacier_mask_100m_ds = stats_glacier_mask_100m_df.to_xarray()
rainier_sar_500m_ds = rainier_sar_ds.rio.reproject(dst_crs=rainier_sar_ds.crs,resolution=500)
clipped = rainier_sar_500m_ds.rio.clip(rainier_glaciers.geometry,rainier_glaciers.crs,drop=False,invert=False)
clipped = clipped.where(clipped<100000)
stats_glacier_mask_500m_df = s1_rtc_bs_utils.get_stats(clipped)
stats_glacier_mask_500m_ds = stats_glacier_mask_500m_df.to_xarray()
f,ax=plt.subplots(1,4,figsize=(40,8),sharex=True,sharey=True)
stats_glacier_mask_ds['runoff_dates'].plot(ax=ax[0],cmap='twilight',vmin=0,vmax=365)
stats_glacier_mask_40m_ds['runoff_dates'].plot(ax=ax[1],cmap='twilight',vmin=0,vmax=365)
stats_glacier_mask_100m_ds['runoff_dates'].plot(ax=ax[2],cmap='twilight',vmin=0,vmax=365)
stats_glacier_mask_500m_ds['runoff_dates'].plot(ax=ax[3],cmap='twilight',vmin=0,vmax=365)
ax[0].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nMt. Rainier Glacier Mask \n20m Resolution')
ax[1].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nMt. Rainier Glacier Mask \n40m Resolution')
ax[2].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nMt. Rainier Glacier Mask \n100m Resolution')
ax[3].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nMt. Rainier Glacier Mask \n500m Resolution')
Text(0.5, 1.0, 'Backscatter-derived Snowmelt Runoff Onset DOY \nMt. Rainier Glacier Mask \n500m Resolution')
f,ax=plt.subplots(2,4,figsize=(40,8),sharey=False,gridspec_kw={'height_ratios': [1, 6]})
V_full = get_semivariogram(stats_glacier_mask_df,'runoff_dates',sampling_frac=0.1);
V_full.plot(axes=ax[::-1,0]);
V_ndvi = get_semivariogram(stats_glacier_mask_40m_df,'runoff_dates',sampling_frac=0.1);
V_ndvi.plot(axes=ax[::-1,1]);
V_gm = get_semivariogram(stats_glacier_mask_100m_df,'runoff_dates',sampling_frac=0.1);
V_gm.plot(axes=ax[::-1,2]);
V_gm = get_semivariogram(stats_glacier_mask_500m_df,'runoff_dates',sampling_frac=1); # look at the dip at the end due to symmetry!
V_gm.plot(axes=ax[::-1,3]);
Interestingly, the range of the variogram progressively decreases from 6300m to 5000m as we coarsen our raster. The lowest resolution raster seems to have the largest variance dip at large spatial lags. I think this is still due to radial symmetry / topography which would be more prominent when upscaled (an important consideration to is resampling technique, here I just used nearest because of the [0,365] issue). Modeled sill values seem similar across the resolutions. The first (20m) fit variogram actually fits quite nicely with the experimental data. Due to the 500m raster having the highest range, I would think this would be best for decreasing spatial autocorrelation.
plot_morans_global_i(stats_glacier_mask_df,sampling_frac=1)
plot_morans_global_i(stats_glacier_mask_40m_df,sampling_frac=1)
plot_morans_global_i(stats_glacier_mask_100m_df,sampling_frac=1)
plot_morans_global_i(stats_glacier_mask_500m_df,sampling_frac=1)
Looks like the previous guess was correct, the 500m resolution raster indeed has the lowest Moran’s I global value. In fact, we again decrease our Moran’s Global I value monotonically as we coarsen our raster. For the first time so far, we can even seen the reference distribution for Moran’s I in the left hand plot for the 500m raster.
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_glacier_mask_df,sampling_frac=0.005) #but now lets zoom in and look at clusters. where is this high autocorr coming from?
plt.tight_layout()
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_glacier_mask_40m_df,sampling_frac=0.005*4) #but now lets zoom in and look at clusters. where is this high autocorr coming from?
plt.tight_layout()
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_glacier_mask_100m_df,sampling_frac=0.005*10) #but now lets zoom in and look at clusters. where is this high autocorr coming from?
plt.tight_layout()
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_glacier_mask_500m_df,sampling_frac=1) #but now lets zoom in and look at clusters. where is this high autocorr coming from?
plt.tight_layout()
It looks like resolution doesn’t control spatial autocorrelation hotspots all that much because Moran’s Local I values have significance in the same areas in all rasters. The resolution only seems to modulate the strength of the significance, not whether clusters are present or not.
stats_glacier_mask_df.corr()
elevation | aspect | slope | dah | runoff_dates | ripening_dates | runoff_prediction | ripening_prediction | |
---|---|---|---|---|---|---|---|---|
elevation | 1.000000 | -0.045983 | 0.373325 | 0.041383 | 0.571092 | -0.128201 | 0.990514 | -0.840287 |
aspect | -0.045983 | 1.000000 | -0.022456 | 0.356095 | -0.041766 | -0.005894 | -0.094781 | -0.155613 |
slope | 0.373325 | -0.022456 | 1.000000 | -0.070506 | 0.188355 | -0.044186 | 0.381605 | -0.267060 |
dah | 0.041383 | 0.356095 | -0.070506 | 1.000000 | -0.055523 | -0.087948 | -0.096301 | -0.576451 |
runoff_dates | 0.571092 | -0.041766 | 0.188355 | -0.055523 | 1.000000 | -0.157640 | 0.576561 | -0.436930 |
ripening_dates | -0.128201 | -0.005894 | -0.044186 | -0.087948 | -0.157640 | 1.000000 | -0.115619 | 0.152568 |
runoff_prediction | 0.990514 | -0.094781 | 0.381605 | -0.096301 | 0.576561 | -0.115619 | 1.000000 | -0.757821 |
ripening_prediction | -0.840287 | -0.155613 | -0.267060 | -0.576451 | -0.436930 | 0.152568 | -0.757821 | 1.000000 |
stats_glacier_mask_40m_df.corr()
elevation | aspect | slope | dah | runoff_dates | ripening_dates | runoff_prediction | ripening_prediction | |
---|---|---|---|---|---|---|---|---|
elevation | 1.000000 | -0.046475 | 0.374180 | 0.041902 | 0.572006 | -0.128733 | 0.990421 | -0.841141 |
aspect | -0.046475 | 1.000000 | -0.018726 | 0.353970 | -0.039627 | -0.008611 | -0.095218 | -0.153563 |
slope | 0.374180 | -0.018726 | 1.000000 | -0.072605 | 0.190531 | -0.048819 | 0.382797 | -0.266951 |
dah | 0.041902 | 0.353970 | -0.072605 | 1.000000 | -0.055707 | -0.088091 | -0.096455 | -0.575587 |
runoff_dates | 0.572006 | -0.039627 | 0.190531 | -0.055707 | 1.000000 | -0.159778 | 0.577538 | -0.438010 |
ripening_dates | -0.128733 | -0.008611 | -0.048819 | -0.088091 | -0.159778 | 1.000000 | -0.116071 | 0.153046 |
runoff_prediction | 0.990421 | -0.095218 | 0.382797 | -0.096455 | 0.577538 | -0.116071 | 1.000000 | -0.758409 |
ripening_prediction | -0.841141 | -0.153563 | -0.266951 | -0.575587 | -0.438010 | 0.153046 | -0.758409 | 1.000000 |
stats_glacier_mask_100m_df.corr()
elevation | aspect | slope | dah | runoff_dates | ripening_dates | runoff_prediction | ripening_prediction | |
---|---|---|---|---|---|---|---|---|
elevation | 1.000000 | -0.043463 | 0.371579 | 0.036828 | 0.583102 | -0.125618 | 0.990490 | -0.841429 |
aspect | -0.043463 | 1.000000 | -0.018994 | 0.363747 | -0.041339 | -0.001211 | -0.093349 | -0.160985 |
slope | 0.371579 | -0.018994 | 1.000000 | -0.068957 | 0.171923 | -0.039524 | 0.379423 | -0.267970 |
dah | 0.036828 | 0.363747 | -0.068957 | 1.000000 | -0.059466 | -0.085244 | -0.101012 | -0.570989 |
runoff_dates | 0.583102 | -0.041339 | 0.171923 | -0.059466 | 1.000000 | -0.154345 | 0.588700 | -0.446872 |
ripening_dates | -0.125618 | -0.001211 | -0.039524 | -0.085244 | -0.154345 | 1.000000 | -0.113324 | 0.149291 |
runoff_prediction | 0.990490 | -0.093349 | 0.379423 | -0.101012 | 0.588700 | -0.113324 | 1.000000 | -0.759082 |
ripening_prediction | -0.841429 | -0.160985 | -0.267970 | -0.570989 | -0.446872 | 0.149291 | -0.759082 | 1.000000 |
stats_glacier_mask_500m_df.corr()
elevation | aspect | slope | dah | runoff_dates | ripening_dates | runoff_prediction | ripening_prediction | |
---|---|---|---|---|---|---|---|---|
elevation | 1.000000 | -0.078896 | 0.366683 | 0.009799 | 0.566041 | -0.109605 | 0.995439 | -0.827517 |
aspect | -0.078896 | 1.000000 | -0.046622 | 0.366007 | 0.010181 | 0.037190 | -0.113527 | -0.140647 |
slope | 0.366683 | -0.046622 | 1.000000 | -0.022294 | 0.166222 | -0.083397 | 0.367481 | -0.288902 |
dah | 0.009799 | 0.366007 | -0.022294 | 1.000000 | -0.048696 | -0.075434 | -0.085637 | -0.569522 |
runoff_dates | 0.566041 | 0.010181 | 0.166222 | -0.048696 | 1.000000 | -0.089872 | 0.568635 | -0.437953 |
ripening_dates | -0.109605 | 0.037190 | -0.083397 | -0.075434 | -0.089872 | 1.000000 | -0.102012 | 0.132451 |
runoff_prediction | 0.995439 | -0.113527 | 0.367481 | -0.085637 | 0.568635 | -0.102012 | 1.000000 | -0.770184 |
ripening_prediction | -0.827517 | -0.140647 | -0.288902 | -0.569522 | -0.437953 | 0.132451 | -0.770184 | 1.000000 |
The correlations across the different resolutions are very similar! Can probably make some argument from the law of large numbers. So long as we are sampling from the same population and we’ve reached a critical sample size, etc.
emmons_glacier = rainier_glaciers[rainier_glaciers['Name'] == 'Emmons Glacier WA']
f,ax=plt.subplots(figsize=(10,10))
rainier_glaciers.plot(ax=ax)
emmons_glacier.plot(ax=ax,color='orange')
ax.set_title('Emmons Glacier')
Text(0.5, 1.0, 'Emmons Glacier')
stats_emmons_glacier_mask_df = s1_rtc_bs_utils.get_stats(rainier_sar_ds.rio.clip(emmons_glacier.geometry,emmons_glacier.crs,drop=False,invert=False),dem=dem,aspect=aspect,slope=slope,dah=dah)
stats_emmons_glacier_mask_ds = stats_emmons_glacier_mask_df.to_xarray()
clipped = rainier_sar_40m_ds.rio.clip(emmons_glacier.geometry,emmons_glacier.crs,drop=False,invert=False)
clipped = clipped.where(clipped<100000)
stats_emmons_glacier_mask_40m_df = s1_rtc_bs_utils.get_stats(clipped)
stats_emmons_glacier_mask_40m_ds = stats_emmons_glacier_mask_40m_df.to_xarray()
rainier_sar_100m_ds = rainier_sar_ds.rio.reproject(dst_crs=rainier_sar_ds.crs,resolution=100)
clipped = rainier_sar_100m_ds.rio.clip(emmons_glacier.geometry,emmons_glacier.crs,drop=False,invert=False)
clipped = clipped.where(clipped<100000)
stats_emmons_glacier_mask_100m_df = s1_rtc_bs_utils.get_stats(clipped)
stats_emmons_glacier_mask_100m_ds = stats_emmons_glacier_mask_100m_df.to_xarray()
f,ax=plt.subplots(1,3,figsize=(40,8),sharex=True,sharey=True)
stats_emmons_glacier_mask_ds['runoff_dates'].plot(ax=ax[0],cmap='twilight')
stats_emmons_glacier_mask_40m_ds['runoff_dates'].plot(ax=ax[1],cmap='twilight')
stats_emmons_glacier_mask_100m_ds['runoff_dates'].plot(ax=ax[2],cmap='twilight')
ax[0].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nEmmons Glacier \n20m Resolution')
ax[1].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nEmmons Glacier \n40m Resolution')
ax[2].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nEmmons Glacier \n100m Resolution')
Text(0.5, 1.0, 'Backscatter-derived Snowmelt Runoff Onset DOY \nEmmons Glacier \n100m Resolution')
f,ax=plt.subplots(2,3,figsize=(40,8),sharey=False,gridspec_kw={'height_ratios': [1, 6]})
V_full = get_semivariogram(stats_emmons_glacier_mask_df,'runoff_dates',sampling_frac=0.5);
V_full.plot(axes=ax[::-1,0]);
V_ndvi = get_semivariogram(stats_emmons_glacier_mask_40m_df,'runoff_dates',sampling_frac=0.5);
V_ndvi.plot(axes=ax[::-1,1]);
V_gm = get_semivariogram(stats_emmons_glacier_mask_100m_df,'runoff_dates',sampling_frac=0.5);
V_gm.plot(axes=ax[::-1,2]);
All three variograms look very similar. We again see an experimental variogram that looks exponential with no visible sill. This seems reasonable for a scene with a constant, unidirectional gradient. A directional variogram would be helpful for analysis to see in which directions spatial lag affects variance. There also seems to be a skew in the lag histogram, which makes sense because we no longer have a scene with multiple axis of symmetry. Let’s take a look at Moran’s Global I.
plot_morans_global_i(stats_emmons_glacier_mask_df,sampling_frac=1)
plot_morans_global_i(stats_emmons_glacier_mask_40m_df,sampling_frac=1)
plot_morans_global_i(stats_emmons_glacier_mask_100m_df,sampling_frac=1)
Once again, we decrease our Moran’s Global I value monotonically as we coarsen our raster. These values are still very high, which checks out because we can visibly see the clustering / gradient. The different spatial extent doesn’t seem to change the Global I values all too much. Maybe let’s take a look at the Local I’s once again.
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_emmons_glacier_mask_df,sampling_frac=0.03) #but now lets zoom in and look at clusters. where is this high autocorr coming from?
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_emmons_glacier_mask_40m_df,sampling_frac=0.1) #but now lets zoom in and look at clusters. where is this high autocorr coming from?
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_emmons_glacier_mask_100m_df,sampling_frac=1) #but now lets zoom in and look at clusters. where is this high autocorr coming from?
#plt.tight_layout()
Moran’s Local I seems to spatially line up very well across the three resolutions. This shows that the same areas that are spatially autocorrelated across the different rasters, though the strength of the autocorrelation decreases slightly at lower resolution. Now let’s check out the correlations.
stats_emmons_glacier_mask_df.corr()
elevation | aspect | slope | dah | runoff_dates | ripening_dates | runoff_prediction | ripening_prediction | |
---|---|---|---|---|---|---|---|---|
elevation | 1.000000 | -0.103616 | 0.575041 | -0.270993 | 0.707038 | -0.296244 | 0.997465 | -0.982273 |
aspect | -0.103616 | 1.000000 | -0.009633 | 0.281883 | -0.066108 | 0.124756 | -0.122115 | 0.151206 |
slope | 0.575041 | -0.009633 | 1.000000 | -0.488151 | 0.410835 | -0.151071 | 0.598148 | -0.629564 |
dah | -0.270993 | 0.281883 | -0.488151 | 1.000000 | -0.240151 | 0.134700 | -0.338797 | 0.446631 |
runoff_dates | 0.707038 | -0.066108 | 0.410835 | -0.240151 | 1.000000 | -0.276656 | 0.708835 | -0.703959 |
ripening_dates | -0.296244 | 0.124756 | -0.151071 | 0.134700 | -0.276656 | 1.000000 | -0.299516 | 0.301590 |
runoff_prediction | 0.997465 | -0.122115 | 0.598148 | -0.338797 | 0.708835 | -0.299516 | 1.000000 | -0.993121 |
ripening_prediction | -0.982273 | 0.151206 | -0.629564 | 0.446631 | -0.703959 | 0.301590 | -0.993121 | 1.000000 |
stats_emmons_glacier_mask_40m_df.corr()
elevation | aspect | slope | dah | runoff_dates | ripening_dates | runoff_prediction | ripening_prediction | |
---|---|---|---|---|---|---|---|---|
elevation | 1.000000 | -0.114004 | 0.576249 | -0.270886 | 0.702656 | -0.304473 | 0.997983 | -0.982458 |
aspect | -0.114004 | 1.000000 | -0.015344 | 0.280172 | -0.052807 | 0.134337 | -0.130214 | 0.160298 |
slope | 0.576249 | -0.015344 | 1.000000 | -0.489214 | 0.411120 | -0.153562 | 0.597055 | -0.630674 |
dah | -0.270886 | 0.280172 | -0.489214 | 1.000000 | -0.233366 | 0.138110 | -0.331450 | 0.445646 |
runoff_dates | 0.702656 | -0.052807 | 0.411120 | -0.233366 | 1.000000 | -0.285185 | 0.704076 | -0.698665 |
ripening_dates | -0.304473 | 0.134337 | -0.153562 | 0.138110 | -0.285185 | 1.000000 | -0.307528 | 0.309910 |
runoff_prediction | 0.997983 | -0.130214 | 0.597055 | -0.331450 | 0.704076 | -0.307528 | 1.000000 | -0.992315 |
ripening_prediction | -0.982458 | 0.160298 | -0.630674 | 0.445646 | -0.698665 | 0.309910 | -0.992315 | 1.000000 |
stats_emmons_glacier_mask_100m_df.corr()
elevation | aspect | slope | dah | runoff_dates | ripening_dates | runoff_prediction | ripening_prediction | |
---|---|---|---|---|---|---|---|---|
elevation | 1.000000 | -0.138139 | 0.574409 | -0.290985 | 0.691804 | -0.273855 | 0.998767 | -0.950111 |
aspect | -0.138139 | 1.000000 | -0.036998 | 0.269082 | -0.109197 | 0.112076 | -0.149847 | 0.205869 |
slope | 0.574409 | -0.036998 | 1.000000 | -0.529358 | 0.410824 | -0.156521 | 0.592497 | -0.663841 |
dah | -0.290985 | 0.269082 | -0.529358 | 1.000000 | -0.234206 | 0.165701 | -0.338127 | 0.574882 |
runoff_dates | 0.691804 | -0.109197 | 0.410824 | -0.234206 | 1.000000 | -0.236210 | 0.692659 | -0.668018 |
ripening_dates | -0.273855 | 0.112076 | -0.156521 | 0.165701 | -0.236210 | 1.000000 | -0.277980 | 0.288234 |
runoff_prediction | 0.998767 | -0.149847 | 0.592497 | -0.338127 | 0.692659 | -0.277980 | 1.000000 | -0.964426 |
ripening_prediction | -0.950111 | 0.205869 | -0.663841 | 0.574882 | -0.668018 | 0.288234 | -0.964426 | 1.000000 |
The correlations across the different resolutions are very similar again! 0.71 is strong, that means for Emmons Glacier 50.04% of the variation in snowmelt timing date can be explained by elevation. Slope has a strong correlation too with 0.41.
f,ax=plt.subplots(1,3,figsize=(40,8),sharex=True,sharey=True)
stats_emmons_glacier_mask_ds['runoff_dates'].plot(ax=ax[0],cmap='twilight',vmin=0,vmax=365)
stats_emmons_glacier_mask_ds['runoff_prediction'].plot(ax=ax[1],cmap='twilight',vmin=0,vmax=365)
stats_emmons_glacier_mask_ds['residual'] = stats_emmons_glacier_mask_ds['runoff_dates']-stats_emmons_glacier_mask_ds['runoff_prediction']
stats_emmons_glacier_mask_ds['residual'].plot(ax=ax[2],cmap='RdBu',vmin=-100,vmax=100)
ax[0].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nEmmons Glacier \n20m Resolution')
ax[1].set_title('Backscatter-derived Snowmelt Runoff Onset DOY MLS Prediction \nEmmons Glacier \n20m Resolution')
ax[2].set_title('Backscatter-derived Snowmelt Runoff Onset DOY Residual \nEmmons Glacier \n20m Resolution')
Text(0.5, 1.0, 'Backscatter-derived Snowmelt Runoff Onset DOY Residual \nEmmons Glacier \n20m Resolution')
Can capture general trends well, more localized variability unaccounted for. Predicting on slope helps at the glacier scale, but not all that much. Could focus on areas without much elevation change to pull out other explanatory variables. Blue is later in the year than prediction, red is earlier than prediction for the residual plot.
rainier_sar_zoom_ds = rainier_sar_ds.rio.clip_box(minx=597000,miny=5.19E6,maxx=599000,maxy=5.192E6,)
stats_emmons_glacier_mask_zoom_df = s1_rtc_bs_utils.get_stats(rainier_sar_zoom_ds.rio.clip(emmons_glacier.geometry,emmons_glacier.crs,drop=False,invert=False))
stats_emmons_glacier_mask_zoom_ds = stats_emmons_glacier_mask_zoom_df.to_xarray()
f,ax=plt.subplots(figsize=(10,10))
emmons_glacier.plot(ax=ax)
stats_emmons_glacier_mask_zoom_ds['runoff_dates'].plot(ax=ax,add_colorbar=False,cmap='Wistia',vmin=0,vmax=0)
ax.set_xlim([595000, 601000])
ax.set_ylim([5.188E6, 5.1935E6])
ax.set_title('Emmons Glacier (Zoomed Area)')
Text(0.5, 1.0, 'Emmons Glacier (Zoomed Area)')
rainier_sar_40m_zoom_ds = rainier_sar_zoom_ds.rio.reproject(dst_crs=rainier_sar_ds.crs,resolution=40)
clipped = rainier_sar_40m_zoom_ds.rio.clip(emmons_glacier.geometry,emmons_glacier.crs,drop=False,invert=False)
clipped = clipped.where(clipped<100000)
stats_emmons_glacier_mask_40m_zoom_df = s1_rtc_bs_utils.get_stats(clipped)
stats_emmons_glacier_mask_40m_zoom_ds = stats_emmons_glacier_mask_40m_zoom_df.to_xarray()
rainier_sar_100m_zoom_ds = rainier_sar_zoom_ds.rio.reproject(dst_crs=rainier_sar_ds.crs,resolution=100)
clipped = rainier_sar_100m_zoom_ds.rio.clip(emmons_glacier.geometry,emmons_glacier.crs,drop=False,invert=False)
clipped = clipped.where(clipped<100000)
stats_emmons_glacier_mask_100m_zoom_df = s1_rtc_bs_utils.get_stats(clipped)
stats_emmons_glacier_mask_100m_zoom_ds = stats_emmons_glacier_mask_100m_zoom_df.to_xarray()
f,ax=plt.subplots(1,3,figsize=(40,8),sharex=True,sharey=True)
stats_emmons_glacier_mask_zoom_ds['runoff_dates'].plot(ax=ax[0],cmap='twilight',vmin=0,vmax=365)
stats_emmons_glacier_mask_40m_zoom_ds['runoff_dates'].plot(ax=ax[1],cmap='twilight',vmin=0,vmax=365)
stats_emmons_glacier_mask_100m_zoom_ds['runoff_dates'].plot(ax=ax[2],cmap='twilight',vmin=0,vmax=365)
ax[0].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nEmmons Glacier (Zoomed In) \n20m Resolution')
ax[1].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nEmmons Glacier (Zoomed In) \n40m Resolution')
ax[2].set_title('Backscatter-derived Snowmelt Runoff Onset DOY \nEmmons Glacier (zoomed In) \n100m Resolution')
Text(0.5, 1.0, 'Backscatter-derived Snowmelt Runoff Onset DOY \nEmmons Glacier (zoomed In) \n100m Resolution')
f,ax=plt.subplots(2,3,figsize=(40,8),sharey=False,gridspec_kw={'height_ratios': [1, 6]})
V_full = get_semivariogram(stats_emmons_glacier_mask_zoom_df,'runoff_dates',sampling_frac=0.5);
V_full.plot(axes=ax[::-1,0]);
V_ndvi = get_semivariogram(stats_emmons_glacier_mask_40m_zoom_df,'runoff_dates',sampling_frac=0.5);
V_ndvi.plot(axes=ax[::-1,1]);
V_gm = get_semivariogram(stats_emmons_glacier_mask_100m_zoom_df,'runoff_dates',sampling_frac=0.5);
V_gm.plot(axes=ax[::-1,2]);
All three variograms look somewhat similar. We again see behavior reasonable for a scene with a constant, unidirectional gradient. Let’s take a look at Moran’s Global I.
plot_morans_global_i(stats_emmons_glacier_mask_zoom_df,sampling_frac=1)
plot_morans_global_i(stats_emmons_glacier_mask_40m_zoom_df,sampling_frac=1)
plot_morans_global_i(stats_emmons_glacier_mask_100m_zoom_df,sampling_frac=1)
Once again, we dramatically decrease our Moran’s Global I value monotonically as we coarsen our raster. These values are still high even though we chose an area that didn’t seem to be strongly locally correlated in the previous Emmons Glacier analysis which is interesting. They are still much lower than those calculated on the entirety of Emmons though, so maybe it’s just a matter of perspective. Let’s take a look at the Local I’s once again.
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_emmons_glacier_mask_zoom_df,sampling_frac=0.1) #but now lets zoom in and look at clusters. where is this high autocorr coming from?
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_emmons_glacier_mask_40m_zoom_df,sampling_frac=0.3) #but now lets zoom in and look at clusters. where is this high autocorr coming from?
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_emmons_glacier_mask_100m_zoom_df,sampling_frac=1) #but now lets zoom in and look at clusters. where is this high autocorr coming from?
Once again, the same areas are significant across all the resolutions, though the lower resolutions have lower magnitudes of Moran’s I. Compared with the larger extents, the areas of significance still are in roughly the same area, though I’m not sure if I’m imagining it, but maybe a bit of added significance was added towards the fringes of the frame as we move to the exterior of Emmons. Let’s check out the correlations once again.
stats_emmons_glacier_mask_zoom_df.corr()
elevation | aspect | slope | dah | runoff_dates | ripening_dates | runoff_prediction | ripening_prediction | |
---|---|---|---|---|---|---|---|---|
elevation | 1.000000 | 0.093139 | 0.081654 | 0.034509 | 0.442471 | -0.148957 | 0.999921 | -0.999786 |
aspect | 0.093139 | 1.000000 | 0.055059 | 0.358466 | 0.053687 | 0.056835 | 0.097612 | -0.100465 |
slope | 0.081654 | 0.055059 | 1.000000 | -0.359954 | 0.008309 | 0.009725 | 0.077073 | -0.074135 |
dah | 0.034509 | 0.358466 | -0.359954 | 1.000000 | 0.020843 | -0.008218 | 0.047103 | -0.055155 |
runoff_dates | 0.442471 | 0.053687 | 0.008309 | 0.020843 | 1.000000 | -0.092209 | 0.442506 | -0.442492 |
ripening_dates | -0.148957 | 0.056835 | 0.009725 | -0.008218 | -0.092209 | 1.000000 | -0.148984 | 0.148989 |
runoff_prediction | 0.999921 | 0.097612 | 0.077073 | 0.047103 | 0.442506 | -0.148984 | 1.000000 | -0.999967 |
ripening_prediction | -0.999786 | -0.100465 | -0.074135 | -0.055155 | -0.442492 | 0.148989 | -0.999967 | 1.000000 |
stats_emmons_glacier_mask_40m_zoom_df.corr()
elevation | aspect | slope | dah | runoff_dates | ripening_dates | runoff_prediction | ripening_prediction | |
---|---|---|---|---|---|---|---|---|
elevation | 1.000000 | 0.095607 | 0.081620 | 0.031615 | 0.435908 | -0.143686 | 0.999462 | -0.988018 |
aspect | 0.095607 | 1.000000 | 0.095582 | 0.328801 | 0.083953 | 0.067972 | 0.106240 | -0.044156 |
slope | 0.081620 | 0.095582 | 1.000000 | -0.361646 | 0.012009 | 0.037212 | 0.069630 | -0.136884 |
dah | 0.031615 | 0.328801 | -0.361646 | 1.000000 | 0.028072 | 0.017891 | 0.064365 | 0.123024 |
runoff_dates | 0.435908 | 0.083953 | 0.012009 | 0.028072 | 1.000000 | -0.070851 | 0.436143 | -0.428478 |
ripening_dates | -0.143686 | 0.067972 | 0.037212 | 0.017891 | -0.070851 | 1.000000 | -0.142873 | 0.145428 |
runoff_prediction | 0.999462 | 0.106240 | 0.069630 | 0.064365 | 0.436143 | -0.142873 | 1.000000 | -0.982428 |
ripening_prediction | -0.988018 | -0.044156 | -0.136884 | 0.123024 | -0.428478 | 0.145428 | -0.982428 | 1.000000 |
stats_emmons_glacier_mask_100m_zoom_df.corr()
elevation | aspect | slope | dah | runoff_dates | ripening_dates | runoff_prediction | ripening_prediction | |
---|---|---|---|---|---|---|---|---|
elevation | 1.000000 | 0.129929 | 0.068839 | 0.054425 | 0.475028 | -0.112249 | 0.999999 | -0.956900 |
aspect | 0.129929 | 1.000000 | 0.098457 | 0.248949 | 0.120675 | -0.001075 | 0.129654 | -0.053980 |
slope | 0.068839 | 0.098457 | 1.000000 | -0.440548 | 0.066072 | 0.059252 | 0.069345 | -0.195094 |
dah | 0.054425 | 0.248949 | -0.440548 | 1.000000 | 0.025314 | 0.027908 | 0.053289 | 0.237907 |
runoff_dates | 0.475028 | 0.120675 | 0.066072 | 0.025314 | 1.000000 | -0.076608 | 0.475028 | -0.454711 |
ripening_dates | -0.112249 | -0.001075 | 0.059252 | 0.027908 | -0.076608 | 1.000000 | -0.112287 | 0.117304 |
runoff_prediction | 0.999999 | 0.129654 | 0.069345 | 0.053289 | 0.475028 | -0.112287 | 1.000000 | -0.957230 |
ripening_prediction | -0.956900 | -0.053980 | -0.195094 | 0.237907 | -0.454711 | 0.117304 | -0.957230 | 1.000000 |
The correlations across the different resolutions are very similar again, but much smaller because we chose a very localized section of the glacier without much variation in slope, elevation, or aspect.
plot_morans_global_i(stats_glacier_mask_40m_df,sampling_frac=0.001)
f,ax = plt.subplots(3,1,figsize=(7,15))
plot_morans_local_i(stats_glacier_mask_40m_df,sampling_frac=0.001)
Thinking about variograms and the fact that our rasters over Mt. Rainier have what I would call a limiting intraobject fundamental length. It seems variograms are good at capturing clusters, but what if the entire scene is one big circular cluster? Thinking about that exponential looking experimental variogram. Bit of a rabithole
It might be helpful for you to check my math on determining the weights matrix for Moran’s I. Is knn=8 reasonable? What's the norm?
Need help with the [0,365] problem :(
Will share upon request, this started as a qualifying exam question so I am not posting publically at the request of my qualifying exam committee.