This code shows as simple way to:
import xarray as xr
import cf_units as cf
import numpy as np
import pandas as pd
import esmlab
from ctsm_py import utils
import SVD_tools as svd
import segment as sg
import scipy # Try scipy to detrend
from scipy import signal
from scipy.stats import spearmanr
import warnings
# some resources for plotting
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.lines as mlines
import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature
%matplotlib inline
# supress Runtime warnings that let you know when code isn't too efficiently
import warnings
warnings.simplefilter("ignore", category=RuntimeWarning)
# TODO, turn warmings back on with code below?
#warnings.resetwarnings()
variables = ['GPP','TBOT','SOILLIQ'] #TWS not available for CESM1 / CLM4
# --- CESM2 hist (1 ensemble member) ---
model = 'CESM2_hist1'
pattern = '/glade/collections/cdg/timeseries-cmip6/b.e21.BHIST.f09_g17.CMIP6-historical.001/' \
'lnd/proc/tseries/month_1/b.e21.BHIST.f09_g17.CMIP6-historical.001.clm2.h0.{var}.185001-201412.nc'
# --- CESM1-LENS hist (1 ensemble member) ---
#model = 'CESM1_LENS1'
#pattern = '/glade/collections/cdg/data/cesmLE/CESM-CAM5-BGC-LE/lnd/proc/tseries/monthly/' \
# '{var}/b.e11.B20TRC5CNBDRD.f09_g16.001.clm2.h0.{var}.185001-200512.nc'
# --- CLM5 w/ GSWP3 ---
#model = 'CLM5_GSWP3'
#pattern = '/glade/p/cgd/tss/people/oleson/CLM_LAND_ONLY_RELEASE/CLM5/clm50_r270_1deg_GSWP3V1_iso_newpopd_hist/'\
# 'lnd/proc/tseries/month_1/clm50_r270_1deg_GSWP3V1_iso_newpopd_hist.clm2.h0.{var}.185001-201412.nc'
# --- CLM45 w/ GSWP3 ---
#model ='CLM45_GSWP3'
#pattern = '/glade/p/cgd/tss/people/oleson/CLM_LAND_ONLY_RELEASE/CLM4.5/clm45_r270_1deg_GSWP3V1_hist/'\
# 'lnd/proc/tseries/month_1/clm45_r270_1deg_GSWP3V1_hist.clm2.h0.{var}.185001-201412.nc'
# --- CLM40 w/ GSWP3 ---
# NO TWS output!!
#model ='CLM4_GSWP3'
#pattern = '/glade/p/cgd/tss/people/oleson/CLM_LAND_ONLY_RELEASE/CLM4/clm40_r270_1deg_GSWP3v1_CMIP6_hist/'\
# 'lnd/proc/tseries/month_1/clm40_r270_1deg_GSWP3v1_CMIP6_hist.clm2.h0.{var}.185001-201412.nc'
# --- Danica's no crop run ---
#model = 'CLM5_GSWP3_noCrop'
#pattern = '/glade/p/cesm/lmwg_dev/dll/CLM5GSWP3_NoCrop/SingleVarTimeFiles/'\
# 'clm50_r267_1deg_GSWP3V1_iso_hist_nocrop_transientfix.clm2.h0.{var}.185001-201012.nc'
file = [pattern.format(var=var) for var in variables]
print(file[1])
# would be nice to just do this in a loop...
var = variables #redundant, but 'var' is used more later on...
ds0 = utils.time_set_mid(xr.open_dataset(file[0], decode_times=True), 'time')
ds1 = utils.time_set_mid(xr.open_dataset(file[1], decode_times=True), 'time')
ds2 = utils.time_set_mid(xr.open_dataset(file[2], decode_times=True), 'time')
if variables[2] == 'SOILLIQ':
dim_lev = ds2[var[2]].dims[1]
ds2 = ds2.sum(dim=dim_lev)
ds = xr.merge([ds0, ds1],compat='override')
ds[var[2]] = ds2[var[2]]
ds['TBOT'] = ds.TBOT - 273.15
area = ds.area
landfrac = ds.landfrac
/glade/collections/cdg/timeseries-cmip6/b.e21.BHIST.f09_g17.CMIP6-historical.001/lnd/proc/tseries/month_1/b.e21.BHIST.f09_g17.CMIP6-historical.001.clm2.h0.TBOT.185001-201412.nc
# JH suggestions, trouble shooting to see if this is why loop below fails...
# reading data this way gets rid of chunk sizes
# utils.time_set_mid corrects dates, to get Jan of first year
# combine needed for newer xarray versions
# ds = utils.time_set_mid(xr.open_mfdataset(files, combine='by_coords', decode_times=True), 'time')
years = 25
months = years * 12
tlat = 60 #60 #-5 #46 #-3
tlon = 240 #240 #300 #262 #280
ds_test = ds.sel(lat=tlat, lon=tlon, method='nearest') #select a single grid point to work with first
ds_last = ds_test.isel(time=slice(-months,None))
dataset = ds_last.get(var)
ds_ann = dataset.apply(utils.weighted_annual_mean)
plt.plot(ds_last.time['time'], ds_last.get(var[2]), '-' )
plt.plot(ds_ann.time['time'], ds_ann.get(var[2]), '-' );
# esmlab anomaly function needs time_bounds in the dataset!
# test to see of climatology and anomaly functions see to work appropriately?
# resample monthly data to seasonal
ds_clim = esmlab.core.climatology(ds_last,freq='mon', time_coord_name='time')
ds_anom = esmlab.core.anomaly(ds_last, clim_freq='mon', time_coord_name='time')
ds_detrend = ds_anom.get(var).map(signal.detrend) # still need to use get so detrend works
ds_detrend_season = ds_detrend.resample(time="QS-DEC").mean()
# now look at seasonal
plt.figure(figsize=[16,5])
for i in range(3):
plt.subplot(1, 3, (1+i))
plt.plot(ds_last.time['time.month'], ds_last.get(var[i]), 'o' )
plt.plot(ds_clim.time['time.month'], ds_clim.get(var[i]), '-', lw=3 )
plt.plot(ds_anom.time['time.month'], ds_anom.get(var[i]), 'o' )
plt.plot(ds_anom.time['time.month'], ds_detrend.get(var[i]), '*' )
plt.axhline(0, color='black', lw=1);
plt.ylabel(var[i]+", "+model+" lat= "+str(tlat))
plt.xlabel('month')
if i == 1:
plt.legend(labels=('raw','clim','anom','detrend_anom'),frameon = False)
plt.subplots_adjust(wspace=0.25)
fig = plt.figure(figsize=(12,4))
ax1 = fig.add_subplot(1,1,1)
color = 'tab:green'
ax1.set_xlabel('time')
ax1.set_ylabel(var[0]+" "+model, color=color)
ax1.plot(ds_detrend_season.time, ds_detrend_season.get(var[0]), '-', color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis
color = 'tab:red'
ax2.set_ylabel(var[1], color=color) # we already handled the x-label with ax1
ax2.plot(ds_detrend_season.time, ds_detrend_season.get(var[1]), '--', color=color)
ax2.tick_params(axis='y', labelcolor=color)
ax3 = ax1.twinx() # instantiate a second axes that shares the same x-axis
color = 'tab:blue'
ax3.set_ylabel(var[2], color=color) # we already handled the x-label with ax1
ax3.plot(ds_detrend_season.time, ds_detrend_season.get(var[2]), '-', color=color)
ax3.tick_params(axis='y', labelcolor=color)
plt.show();
plt.plot(ds_anom.time['time'], ds_anom.get(var[0]), '-',c='green' )
plt.plot(ds_anom.time['time'], ds_detrend.get(var[0]), '-' ,c='red')
plt.ylabel(var[0]);
# orig. mon x year
iavmatrix =np.zeros([3, 12, years])
for i in range(len(var)):
for iyr in range(years):
iavmatrix[i, 0:12, iyr]=ds_detrend.get(var[i])[iyr*12:(iyr+1)*12] #reshape timeseries vector into a matrix (year x month)
plt.figure(figsize=[16,5])
for i in range(3):
plt.subplot(1, 3, (1+i))
plt.ylabel(var[i]+' detrended anomalies, all years')
plt.plot(iavmatrix[i,:,:]);
plt.subplots_adjust(wspace=0.25)
This code was provided by Gretchen Keppel-Aleks & initially developed by Zach Butterfield
Further modified by W. Wieder to accept matrix when years > 12 (nmonths)
#assign the 2-D matrix (month x year) to decompose
#Call the function to calculate the singular vectors and their annual weights
vectors, weights=svd.decompose(iavmatrix[0,:,:])
theta ,varfrac =svd.calc_redistribution(vectors , weights , iavmatrix[0,:,:])
print(vectors.shape) #(nyears , nmonths)
print(weights.shape)
#print(ds_detrend_season) #(101 time slices, needs to be reshaped to nyears*season)
# ----- run all stats on hydrologic year in SH -----
mm2plot=ds_clim.get(var[0])
idx = [6,7,8,9,10,11,0,1,2,3,4,5]
if tlat < 0:
mm2plot = mm2plot[idx]
for i in range(2): # just focus on first two vectors
vectors[i,:] = vectors[i,:][idx]
(25, 12) (25, 25)
# also try spearman's rank correlation
s = np.zeros(years)
for i in range(years):
s[i] = spearmanr(mm2plot,vectors[i,:])[0]
print('Spearmans correlation SV1 vectors ~ Climatology: %.3f' % s[0])
print('Spearmans correlation SV2 vectors ~ Climatology: %.3f' % s[1])
# also try weighting correlation based on monthly contribution to annual flux
wgt = mm2plot/mm2plot.sum()
def cov(x, y, w):
"""Weighted Covariance"""
return np.sum(w * (x - np.average(x, weights=w)) * (y - np.average(y, weights=w))) / np.sum(w)
def corr(x, y, w):
"""Weighted Correlation"""
return cov(x, y, w) / np.sqrt(cov(x, x, w) * cov(y, y, w))
r = np.zeros(years)
for i in range(years):
r[i] = corr(mm2plot.values,vectors[i,:], wgt.values)
print('weighted correlation SV1 vectors ~ Climatology: %.3f' % r[0])
print('weighted correlation SV2 vectors ~ Climatology: %.3f' % r[1])
## for our generic tropical plot, SV1 = early shift & SV2 = amplification
Spearmans correlation SV1 vectors ~ Climatology: 0.133 Spearmans correlation SV2 vectors ~ Climatology: 0.762 weighted correlation SV1 vectors ~ Climatology: -0.241 weighted correlation SV2 vectors ~ Climatology: 0.944
## add logicals to flip vectors and weights if:
# 1a) amplificaiton vector and correlation with climatology < 0
# 2) phase shift vector and minVector comes before maxVector
# 1b) amplificaiton vector and sum(vectors)<0 [this corrects issues in the tropics]
flip_corr = s<0 # here using spearmans
min_vector = np.argmin(vectors, axis=1)
max_vector = np.argmax(vectors, axis=1)
flip_vector = min_vector < max_vector
print(flip_vector)
# only flip corr where amplificaiton, vector where phase
if np.absolute(s[0])<np.absolute(s[1]):
flip_corr[0] = False
flip_vector[1] = False
else:
flip_corr[1] = False
flip_vector[0] = False
flip_vector = flip_corr + flip_vector
print(flip_vector)
for i in range(years):
if flip_vector[i] == True:
vectors[i,:] = -vectors[i,:]
weights[i,:] = -weights[i,:]
[False True True False False False True False False True True True False False False False False False False False False False False False False] [False False True False True True True True False True True True False False False False False False False False False False False False False]
flip_amp = np.mean(vectors, axis=1) < 0.
if np.absolute(s[0])<np.absolute(s[1]):
flip_amp[0] = False
print(flip_amp)
for i in range(years):
if flip_amp[i] == True:
vectors[i,:] = -vectors[i,:]
weights[i,:] = -weights[i,:]
[False False True True False True False True True False False True False False False False False False False False False False False False False]
# Plot First and Second Singular Vectors
# TODO This is a pretty complicated plotting method that could likely be improved & made into a function
# That said, the it makes nice, clear plots... we'll keep it for now
fig = plt.figure(4,figsize=(3,4.5))
ax1 = fig.add_subplot(111)
month = np.arange(12)
barwidth = 0.2
ymax=np.max(vectors)
vectors2plot=vectors*7000 #This is a totally arbitrary scale factor,
# decided by me, to make figures look good!
ax1.plot(np.array(month)+2*barwidth,mm2plot/(4*max(mm2plot))-0.05,'darkgray',linewidth=3)
ax1.plot(np.array(month)+2*barwidth,mm2plot/(4*max(mm2plot))+0.35,'darkgray',linewidth=3)
ax1.plot(np.array(month)+2*barwidth,vectors2plot[0,:]*0.75+0.35,'r-',linewidth=3)
ax1.plot(np.array(month)+2*barwidth,vectors2plot[1,:]*0.75-0.05,'b-',linewidth=3)
ax1.axhline(-0.05,color='black')
ax1.axhline(0.35,color='black')
ax1.set_xticks(np.array(month)+2*barwidth)
ax1.set_xticklabels(['Jan','','','Apr','','','Jul','','','Oct','',''])
ax1.set_yticks([-0.05,0.35])
ax1.set_yticklabels(['SV2','SV1'])
ax1.set_ylim([-0.2,0.65])
ax1.text(0.5,.50,str(int(varfrac[0]*100))+"%\n$\Theta$="+str('%.2f'%theta[0])+
"\n"+str('rank cor %.2f' % s[0]))
ax1.text(0.5,.10,str(int(varfrac[1]*100))+"%\n$\Theta$="+str('%.2f'%theta[1])+
"\n"+str('rank cor %.2f' % s[1]))
ax1.title.set_text(model +' '+var[0])
ax1.text(4,-0.29,'Month')
plt.tight_layout()
plt.savefig('SVD_'+model+'_ex_'+var[0]+'.eps') ;
# TODO, there must be a better way to reshape data, but I can't get group by to work
season_group = ds_detrend_season.groupby('time.month')
#reshape timeseries vector into a matrix (season x year)
# Omits the last year, which only has data for Dec
group_0 =np.zeros([4, years])
group_1 =np.zeros([4, years])
group_2 =np.zeros([4, years])
# Reshape timeseries vector into a matrix (season x year)
# this could also be done with np.reshape...
for iyr in range(years):
group_0[0:4,iyr]=ds_detrend_season.get(var[0])[(iyr*4):(iyr*4)+4]
group_1[0:4,iyr]=ds_detrend_season.get(var[1])[(iyr*4):(iyr*4)+4]
group_2[0:4,iyr]=ds_detrend_season.get(var[2])[(iyr*4):(iyr*4)+4]
# Plot seasonal anomalies by year
plt.plot(group_0[0,], '-',color='green')#DJF
plt.plot(group_0[1,], '-',color='red' ) #MAM
plt.plot(group_0[2,], '-',color='blue' ) #JJA
plt.plot(group_0[3,], '-',color='black' ); #SON
# --- Correlations with climate anomalies ---
print('correlation of sv weights & with seasonal anomalies')
quarter = (['DJF','MAM','JJA','SON']) # can coords be a string?
# Here using weights that were flipped like the vectors
for h in range(2):# vectors
print(" -------- SV"+str(h+1)+" -----------")
for i in range(4):# season
r0 = corr(weights[h,:],group_0[i,:],np.ones(years))
r1 = corr(weights[h,:],group_1[i,:],np.ones(years))
r2 = corr(weights[h,:],group_2[i,:],np.ones(years))
print(var[0]+' weights SV ~ '+var[0]+' '+quarter[i]+' anom. : %.3f' % r0)
print(var[0]+' weights SV ~ '+var[1]+' '+quarter[i]+' anom. : %.3f' % r1)
print(var[0]+' weights SV ~ '+var[2]+' '+quarter[i]+' anom. : %.3f' % r2)
print('--')
# once we arrange the vectors correctly
# phase shift (SV1), + correlated with MAM TBOT anomalies, - correlated with SON TWS
# amplification(SV2), + correlated with JJA TWS anomalies
# this pattern of Phase ~ TBOT & Amp ~ TWS holds for CLM4.5 too
correlation of sv weights & with seasonal anomalies -------- SV1 ----------- GPP weights SV ~ GPP DJF anom. : 0.075 GPP weights SV ~ TBOT DJF anom. : 0.719 GPP weights SV ~ SOILLIQ DJF anom. : 0.176 -- GPP weights SV ~ GPP MAM anom. : 0.990 GPP weights SV ~ TBOT MAM anom. : 0.846 GPP weights SV ~ SOILLIQ MAM anom. : 0.754 -- GPP weights SV ~ GPP JJA anom. : 0.119 GPP weights SV ~ TBOT JJA anom. : 0.276 GPP weights SV ~ SOILLIQ JJA anom. : 0.165 -- GPP weights SV ~ GPP SON anom. : -0.272 GPP weights SV ~ TBOT SON anom. : 0.023 GPP weights SV ~ SOILLIQ SON anom. : 0.169 -- -------- SV2 ----------- GPP weights SV ~ GPP DJF anom. : -0.178 GPP weights SV ~ TBOT DJF anom. : -0.096 GPP weights SV ~ SOILLIQ DJF anom. : -0.266 -- GPP weights SV ~ GPP MAM anom. : -0.010 GPP weights SV ~ TBOT MAM anom. : -0.068 GPP weights SV ~ SOILLIQ MAM anom. : -0.203 -- GPP weights SV ~ GPP JJA anom. : 0.911 GPP weights SV ~ TBOT JJA anom. : 0.398 GPP weights SV ~ SOILLIQ JJA anom. : -0.351 -- GPP weights SV ~ GPP SON anom. : 0.315 GPP weights SV ~ TBOT SON anom. : 0.052 GPP weights SV ~ SOILLIQ SON anom. : -0.440 --
# in the NH gridcell,
# weights for SV1 of GPP = the inverse as MAM GPP anomalies
# weights for SV2 of GPP = the inverse as JJA GPP anomalies
# in SH
# SV1 weights - corr w/ GPP & TWS in JJA and SON
# SV2 weights + corr w/ GPP in MAM, not climate
s2y = 3600 * 24 * 365
if tlat>=0:
plt.plot(group_0[1,:]*s2y,(weights[0,:]), '*', color='red' )
plt.plot(group_0[2,:]*s2y,(weights[1,:]), '*', color='blue' )
plt.legend(loc='upper right',labels=('SV1 weights ~ MAM '+var[0]+' anomalies',
'SV2 weights ~ JJA '+var[0]+' anomalies'))
else:
plt.plot(group_0[3,:]*s2y,(weights[0,:]), '*', color='red' )
plt.plot(group_0[1,:]*s2y,(weights[1,:]), '*', color='blue' )
plt.legend(loc='upper right',labels=('SV1 weights ~ SON '+var[0]+' anomalies',
'SV2 weights ~ MAM '+var[0]+' anomalies'))
plt.xlabel(var[0]+" seasonal anomalies (gC/m2/y)")
plt.ylabel(var[0]+" SV weights");
# let's reverse weights for clarity (vectors are reversed above)
fig = plt.figure(figsize=(8,4))
ax1 = fig.add_subplot(1,1,1)
color = 'red'
if tlat>=0:
x = group_1[1,:] #MAM TBOT
xlab = 'MAM '+var[1]+' anomalies'
else:
x = group_2[3,:] #SON TWS
xlab = 'SON '+var[2]+' anomalies'
y = weights[0,:] #SV1
ax1.set_ylabel(var[0]+' SV weights')
ax1.set_xlabel(xlab)
ax1.plot(x, y,'*', color=color)
ax1.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1))(np.unique(x)), color=color)
red_line = mlines.Line2D([], [], color='red', marker='*',label='SV1 weights ~ '+xlab)
ax1.legend(handles=[red_line], loc='lower right')
ax2 = ax1.twiny() # instantiate a second axes that shares the same x-axis
color = 'blue'
if tlat>=0:
x = group_2[2,:] #JJA TWS
xlab = 'JJA '+var[2]+' anomalies'
else:
x = group_2[1,:] #MAM TWS
xlab = 'MAM '+var[2]+' anomalies'
y = weights[1,:] #SV2
ax2.set_xlabel(xlab)
ax2.plot(x, y,'*', color=color)
ax2.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1))(np.unique(x)), color=color)
ax2.tick_params(axis='x')
blue_line = mlines.Line2D([], [], color='blue', marker='*',label='SV2 weights ~ '+xlab)
ax2.legend(handles=[blue_line], loc='upper left')
fig.tight_layout();
# Use esmlab to calculate climatology & anomalies
# TODO, refer to variables by name (e.g. anom_gpp) to improve readability
# Will leave this for now to have a more generalizable code
ds2 = ds.isel(time=slice(-months,None)) # Select last N years of data
ds_clim = esmlab.core.climatology(ds2,freq='mon') # Calculate climatology
ds_anom = esmlab.core.anomaly(ds2, clim_freq='mon', time_coord_name='time') #not sure how to use slice_mon_clim_time
ds_anom = ds_anom.where(ds_anom.get(var[0]).max(dim='time')) # mask out regions with no GPP for all variables
ds_climSD = ds2.groupby('time.month').std('time')
ds_clim2 = ds2.groupby('time.month').mean('time')
#ds_climSD = ds_climSD.rename({'month':'time'})
ds_clim2 = ds_clim2.rename({'month':'time'})
# check that esmlab and groupby appraoch give the same results
# first need to rename 'month' & align time
#ds_climSD['time'] = ds_clim.time
ds_clim2['time'] = ds_clim.time
ds_clim, ds_clim2 = xr.align(ds_clim, ds_clim2, join='override')
ds_cimDiff = ds_clim.GPP - ds_clim2.GPP
print(ds_cimDiff.max())
<xarray.DataArray 'GPP' ()> array(0.) Attributes: long_name: gross primary production units: gC/m^2/s cell_methods: time: mean
# define function to cacluate svd vectors, weights, etc.
# First we have to detrend the data
def svd_wrap(anom, years=years):
theta = np.full(years, np.nan)
varfrac = np.full(years, np.nan)
if np.isnan(anom)[0] == False:
#detrend results, if needed
detrend = signal.detrend(anom)
#reshape timeseries vector into a matrix (months x year)
matrix=np.zeros([12, years])
for iyr in range(years):
matrix[0:12, iyr]=detrend[iyr*12:(iyr+1)*12]
#Call the function to calculate the singular vectors and their annual weights
[vectors, weights]=svd.decompose(matrix)
[theta,varfrac]=svd.calc_redistribution(vectors, weights, matrix)
return theta, varfrac, vectors, weights
## Select a single grid point to work with first
## Make sure wrapper results are the same as above
#[theta_wrap, varfrac_wrap, vectors_wrap, weights_wrap] = svd_wrap(ds_temp.get(var[0]).values, years)
#plt.plot(theta_wrap - theta, '*') ;
#plt.plot(varfrac_wrap - varfrac, '*',c='r') ;
#print ('differences between results look prety small')
# TODO, this is the time consuming bit of the code.
# JH recommends using map_blocks, once it's developed farther
def calc_svd(da, nyears=years):
dims = dict(zip(da.dims, da.shape))
yx_shape = (dims['lat'], dims['lon'])
# setup output variables
# number of vectors in svd = nyears
theta_shape = yx_shape + (nyears, )
vector_shape = yx_shape + (nyears, 12)
weights_shape = yx_shape + (nyears, nyears)
out = xr.Dataset( coords={"lat": da.lat, "lon": da.lon, 'sv': range(nyears) } )
varnames = ['theta', 'varfrac', 'vectors', 'weights']
out['theta'] = xr.DataArray(np.zeros(theta_shape), dims=('lat', 'lon', 'sv'))
out['varfrac'] = xr.DataArray(np.zeros(theta_shape), dims=('lat', 'lon', 'sv'))
out['vectors'] = xr.DataArray(np.zeros(vector_shape), dims=('lat', 'lon', 'sv', 'time'))
out['weights'] = xr.DataArray(np.zeros(weights_shape), dims=('lat', 'lon', 'sv', 'year'))
# For clarity, rename dimensions of weights (dim1 = vector#, dim2 = year)
# vectors (dim1 = vector#, dim2 = month)
for (i, j), mask in np.ndenumerate(da.isel(time=0).isnull()):
if not mask:
return_vals = svd_wrap(da.isel(lat=i, lon=j).data)
for varname, vals in zip(varnames, return_vals):
out[varname][i, j, ...] = vals
return out
%%time
# TODO, again more descriptive names will make the code easier to read, but for now keeping general
# e.g. svd_gpp = calc_svd(ds_anom['GPP'])
da0 = ds_anom.get(var[0]).transpose('time','lat','lon')
svd0 = calc_svd(da0)
print('finished svd0, '+var[0])
# JH, When I loaded the data as you suggested in cell #2-3, a bunch of warnings crop up.
# Reading the data less effiently doesn't generate the errors (as curently implimented),
# and also seems to make the loop work more efficiently.
finished svd0, GPP CPU times: user 2min 2s, sys: 499 ms, total: 2min 2s Wall time: 2min 2s
# rearrange output & write out to scratch
def transpose(da_in, var):
out = da_in
out['theta'] = out.theta.transpose('sv','lat','lon')
out['varfrac'] = out.varfrac.transpose('sv','lat','lon')
out['vectors'] = out.vectors.transpose('sv', 'time','lat','lon')
out['weights'] = out.weights.transpose('sv', 'year','lat','lon')
ds_clim['time'] = out.time # get time coords to match
out['climatology'] = ds_clim.get(var)
#out.to_netcdf(path='/glade/scratch/wwieder/svd/'+model+'_svd_'+var+'.nc')
return out
#this could also be more specific for readability, using 'GPP' instead of var[0]
svd0 = transpose(svd0, var[0])
svdSH = svd0.copy(deep=True)
svdSH = svdSH.reindex(time=idx)
svdSH['time'] = svd0.time
svd0['climatology'] = svd0.climatology.where(svd0.lat>=0, svdSH.climatology)
svd0['vectors'] = svd0.vectors.where(svd0.lat>=0, svdSH.vectors)
svd0.climatology.sel(lat=-5, lon=295, method='nearest').plot()
svd0.vectors.isel(sv=0).sel(lat=-5, lon=295, method='nearest').plot(color='r')
svd0.vectors.isel(sv=1).sel(lat=-5, lon=295, method='nearest').plot(color='b')
[<matplotlib.lines.Line2D at 0x2ba6c95b4748>]
Example from http://xarray.pydata.org/en/stable/dask.html
import bottleneck
# also try weighting correlation based on monthly contribution to annual flux
# This is code that's likely better suited for a 3rd partly library like esmlab
# TODO, file issue with requst for these kinds of statistical functions to esmlab?
def covariance_gufunc(x, y):
return ((x - x.mean(axis=-1, keepdims=True))
* (y - y.mean(axis=-1, keepdims=True))).mean(axis=-1)
def pearson_correlation_gufunc(x, y):
return covariance_gufunc(x, y) / (x.std(axis=-1) * y.std(axis=-1))
def pearson_correlation(x, y, dim):
return xr.apply_ufunc(
pearson_correlation_gufunc, x, y,
input_core_dims=[[dim], [dim]],
dask='parallelized',
output_dtypes=[float])
# Weighted coorelation
def covariance_gufunc_wgt(x, y, w):
return ((w * x - (x*w).mean(axis=-1, keepdims=True))
* (y - (y*w).mean(axis=-1, keepdims=True))).mean(axis=-1)
def pearson_correlation_gufunc_wgt(x, y, w):
return covariance_gufunc_wgt(x, y, w) / np.sqrt(
covariance_gufunc_wgt(x, x, w) * covariance_gufunc_wgt(y,y,w))
def pearson_correlation_wgt(x, y, w, dim):
return xr.apply_ufunc(
pearson_correlation_gufunc_wgt, x, y, w,
input_core_dims=[[dim], [dim], [dim]],
dask='parallelized',
output_dtypes=[float])
# rank correlation
def spearman_correlation_gufunc(x, y):
x_ranks = bottleneck.rankdata(x, axis=-1)
y_ranks = bottleneck.rankdata(y, axis=-1)
return pearson_correlation_gufunc(x_ranks, y_ranks)
def spearman_correlation(x, y, dim):
return xr.apply_ufunc(
spearman_correlation_gufunc, x, y,
input_core_dims=[[dim], [dim]],
dask='parallelized',
output_dtypes=[float])
# Pull off climatology of GPP & calculate weights based on annual cycle
print(svd0.time)
GPPwgt = svd0.climatology / svd0.climatology.sum(dim='time')
svd0.climatology.sel(lat=tlat, lon=tlon, method='nearest').plot()
svd0.vectors.isel(sv=0).sel(lat=tlat, lon=tlon, method='nearest').plot()
svd0.vectors.isel(sv=1).sel(lat=tlat, lon=tlon, method='nearest').plot()
#GPPwgt.isel(time=0).plot(robust=True);
<xarray.DataArray 'time' (time: 12)> array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) Coordinates: * time (time) int64 0 1 2 3 4 5 6 7 8 9 10 11
[<matplotlib.lines.Line2D at 0x2ba6cb225048>]
# now calculate the correlations, GPP vectors w/ GPP climatology
# pearson, weighted, and ranked correlations
dims = dict(zip(ds[var[0]].dims, ds[var[0]].shape))
yx_shape = (dims['lat'], dims['lon'])
# setup output variables
# number of vectors in svd = nyears
corr_shape = (years, ) + yx_shape
GPP_corr = xr.Dataset( coords={'sv': range(years) , "lat": ds.lat, "lon": ds.lon} )
GPP_corr['pearson'] = xr.DataArray(np.zeros(corr_shape), dims=('sv','lat', 'lon'))
GPP_corr['weighted'] = xr.DataArray(np.zeros(corr_shape), dims=('sv','lat', 'lon'))
GPP_corr['ranked'] = xr.DataArray(np.zeros(corr_shape), dims=('sv','lat', 'lon'))
for i in range(years):
temp_vector = svd0.vectors.isel(sv=i)
GPP_corr.pearson[i,...] = pearson_correlation(svd0.climatology, temp_vector, 'time')
GPP_corr.weighted[i,...] = pearson_correlation_wgt(svd0.climatology, temp_vector, GPPwgt,'time')
GPP_corr.ranked[i,...] = spearman_correlation(svd0.climatology, temp_vector, 'time')
GPP_corr.weighted[0,:,:].plot(robust=True);
# Generate a function for making panel plots of maps
def map_function(da, cb=0, cmap=None, ax=None, title=None, vmax=None, vmin=None):
'''a function to make one subplot'''
if ax is None:
ax = plt.gca()
#da.plot(ax=ax) # more custom args
im = ax.pcolormesh(da.lon,da.lat,da.values,
transform=ccrs.PlateCarree(),
vmax=vmax,vmin=vmin,cmap=cmap)
ax.set_title(title)
ax.coastlines()
ax.set_extent([-180,180,-65,80], crs=ccrs.PlateCarree())
# allows for different colrobars on each plot
if cb == 1:
fig.colorbar(im, ax=ax,shrink=0.40, pad=0, fraction = 0.1)
# Now call the map_function
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14,6), constrained_layout=True,
subplot_kw=dict(projection=ccrs.Robinson()))
for index, ax in np.ndenumerate(axes):
# there are various ways to do this part, index in this case is a tuple (ie `(0, 0)`)
da = svd0.varfrac.isel(sv=index[0])
map_function(da, ax=ax, title='SV'+str(index[0])+' fraction of variance',
vmax=da.max(), vmin=da.min(), cmap='gist_earth_r',cb=1)
# or to provide a common color bar, set vmax/min so same values and
#fig.colorbar(im,ax=axes.ravel().tolist(), shrink=0.5);
plt.savefig('/glade/scratch/wwieder/svd/'+model+'_'+var[0]+'_sv_Variance.eps')
# Mask values to isolate regions with amplificaiton vs. phase shift (early green-up)?
# here using rank correlation, but also could use weighted
mask_shape = (2, ) + yx_shape
sv_mask = xr.Dataset( coords={'sv_type': ['amplification','phase shift'] , "lat": ds.lat, "lon": ds.lon} )
sv_mask['ranked'] = xr.DataArray(np.zeros(mask_shape), dims=('sv_type','lat', 'lon'))
sv_mask['ranked'][0,...] = xr.ufuncs.fabs(GPP_corr.ranked[0,...]) >= \
xr.ufuncs.fabs(GPP_corr.ranked[1,...])
sv_mask['ranked'][1,...] = xr.ufuncs.fabs(GPP_corr.ranked[1,...]) > \
xr.ufuncs.fabs(GPP_corr.ranked[0,...])
sv_mask['weighted'] = xr.DataArray(np.zeros(mask_shape), dims=('sv_type','lat', 'lon'))
sv_mask['weighted'][0,...] = xr.ufuncs.fabs(GPP_corr.weighted[0,...]) >= \
xr.ufuncs.fabs(GPP_corr.weighted[1,...])
sv_mask['weighted'][1,...] = xr.ufuncs.fabs(GPP_corr.weighted[1,...]) > \
xr.ufuncs.fabs(GPP_corr.weighted[0,...])
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14,6), constrained_layout=True,
subplot_kw=dict(projection=ccrs.Robinson()))
for index, ax in np.ndenumerate(axes):
# there are various ways to do this part, index in this case is a tuple (ie `(0, 0)`)
da = sv_mask.ranked.isel(sv_type=index[0])
map_function(da, ax=ax, title=sv_mask.sv_type[index[0]].values,
cmap='gist_earth_r')
plt.savefig('/glade/scratch/wwieder/svd/'+model+'_'+var[0]+'_vectors.eps')
GPP_corr.weighted.isel(sv=0).plot(robust=True);
# This selects amplification vectors
flip_corr = 0+(GPP_corr.ranked < 0.)
flip_corr[0,:,:] = flip_corr[0,:,:].where(sv_mask.ranked.isel(sv_type=0), 0) #only reverse amplitude vectors where needed
flip_corr[1,:,:] = flip_corr[1,:,:].where(sv_mask.ranked.isel(sv_type=1), 0)
#flip_corr.isel(sv=0).plot()
# Still need a better way to think about flipping early phase vectors where needed...
max_vector = svd0.vectors.argmax(dim='time')
min_vector = svd0.vectors.argmin(dim='time')
flip_vector = 0+(min_vector < max_vector)
flip_vector[0,:,:] = flip_vector[0,:,:].where(sv_mask.ranked.isel(sv_type=1),0)
flip_vector[1,:,:] = flip_vector[1,:,:].where(sv_mask.ranked.isel(sv_type=0),0)
flip_combined = flip_corr + flip_vector
flip_combined.isel(sv=0).plot()
svd2 = svd0.copy(deep=True) ## create new datasets that don't change the orig. results
for i in range(years):
svd2.vectors[i,...] = svd2.vectors[i,...].where(flip_combined[i,...] <= 0, -1*svd2.vectors[i,...])
svd2.weights[i,...] = svd2.weights[i,...].where(flip_combined[i,...] <= 0, -1*svd2.weights[i,...])
# add one more logical if if amplication vectors and mean vectors < 0 fip vectors & weights [maybe again...]
flip_amp = svd2.vectors.mean(dim='time') < 0.
flip_amp[0,:,:] = flip_amp[0,:,:].where(sv_mask.ranked.isel(sv_type=0),0)
flip_amp[1,:,:] = flip_amp[1,:,:].where(sv_mask.ranked.isel(sv_type=1),0)
flip_amp.isel(sv=0).plot(robust=True)
for i in range(years):
svd2.vectors[i,...] = svd2.vectors[i,...].where(flip_amp[i,...] <= 0, -1*svd2.vectors[i,...])
svd2.weights[i,...] = svd2.weights[i,...].where(flip_amp[i,...] <= 0, -1*svd2.weights[i,...])
# Now repeat the correlation between vectors and climatology
for i in range(years):
temp_vector = svd2.vectors.isel(sv=i)
GPP_corr.pearson[i,...] = pearson_correlation(svd0.climatology, temp_vector, 'time')
GPP_corr.weighted[i,...] = pearson_correlation_wgt(svd0.climatology, temp_vector, GPPwgt,'time')
GPP_corr.ranked[i,...] = spearman_correlation(svd0.climatology, temp_vector, 'time')
#GPP_corr.weighted[0,:,:].plot(robust=True);
# Plot First and Second Singular Vectors
# Made this long plotting routine into a function
def monthly_plots(ds_all, ds0, ds1, a, b, nA,nB,multi = 0.5e4 ):
fig = plt.figure(4,figsize=(6,4.5))
barwidth = 0.2
month = np.arange(12)
multi = multi #This is a totally arbitrary scale factor, decided by me, to make figures look good!
ymax=np.max(ds0.vectors.values)
# --- GPP, SVD 1 & 2 for regions in phase with GPP
vectors2plot=ds0.vectors*multi
mm2plot = ds_all.climatology
ax1 = fig.add_subplot(1,2,1)
if b >= 0:
ax1.set_xticklabels(['Jan','','','Apr','','','Jul','','','Oct','',''])
else:
ax1.set_xticklabels(['Jul','','','Oct','','','Jan','','','Apr','',''])
ax1.plot(np.array(month)+2*barwidth,mm2plot/(4*max(mm2plot))-0.05,'darkgray',linewidth=3)
ax1.plot(np.array(month)+2*barwidth,mm2plot/(4*max(mm2plot))+0.35,'darkgray',linewidth=3)
ax1.plot(np.array(month)+2*barwidth,vectors2plot[0,:]*0.75+0.35,'b-',linewidth=3)
ax1.plot(np.array(month)+2*barwidth,vectors2plot[1,:]*0.75-0.05,'r-',linewidth=3)
ax1.axhline(-0.05,color='black')
ax1.axhline(0.35,color='black')
ax1.set_xticks(np.array(month)+2*barwidth)
ax1.set_yticks([-0.05,0.35])
ax1.set_yticklabels(['SV2','SV1'])
ax1.set_ylim([-0.2,0.65])
ax1.text(0.5,.52,str(int(ds0.varfrac[0]*100))+"%\n$\Theta$="+str('%.2f'%ds0.theta[0])+
"\nn="+str(nA))
ax1.text(0.5,.15,str(int(ds0.varfrac[1]*100))+"%\n$\Theta$="+str('%.2f'%ds0.theta[1]))
ax1.title.set_text(var[0]+" "+str(a)+"-"+str(b)+", amplification")
ax1.text(4,-0.29,'Month')
# ----- Make second panel, where SV1 = phase shift ----
vectors2plot=ds1.vectors*multi
ax2 = fig.add_subplot(1,2,2)
if b >= 0:
ax2.set_xticklabels(['Jan','','','Apr','','','Jul','','','Oct','',''])
else:
ax2.set_xticklabels(['Jul','','','Oct','','','Jan','','','Apr','',''])
ax2.plot(np.array(month)+2*barwidth,mm2plot/(4*max(mm2plot))-0.05,'darkgray',linewidth=3)#,alpha=0.3)
ax2.plot(np.array(month)+2*barwidth,mm2plot/(4*max(mm2plot))+0.35,'darkgray',linewidth=3)#,alpha=0.3)
ax2.plot(np.array(month)+2*barwidth,vectors2plot[0,:]*0.75+0.35,'b-',linewidth=3)
ax2.plot(np.array(month)+2*barwidth,vectors2plot[1,:]*0.75-0.05,'r-',linewidth=3)
ax2.axhline(-0.05,color='black')
ax2.axhline(0.35,color='black')
ax2.set_xticks(np.array(month)+2*barwidth)
ax2.set_yticks([-0.05,0.35])
ax2.set_yticklabels(['SV2','SV1'])
ax2.set_ylim([-0.2,0.65])
ax2.text(0.5,.52,str(int(ds1.varfrac[0]*100))+"%\n$\Theta$="+str('%.2f'%ds1.theta[0])+
"\nn="+str(nB))
ax2.text(0.5,.15,str(int(ds1.varfrac[1]*100))+"%\n$\Theta$="+str('%.2f'%ds1.theta[1]))
ax2.title.set_text(var[0]+" "+str(a)+"-"+str(b)+", early phase")
ax2.text(4,-0.29,'Month')
plt.savefig('/glade/scratch/wwieder/svd/'+model+'_SV_'+var[0]+'_lat_'+str(a)+'-'+str(b)+'.eps') ;
ax1.remove()
ax2.remove()
# calculate mean annual cycle and mean SV1 and SV2 for different regions
# TODO, JH, How do I save plots for each lat band invidually?
# It would be nice to put these all in one figure & or visualize plots here
#a = [50, 20,-20,-50] # Min lat, -20 to 10 for Amazon
#b = [80, 50, 20, -20] # Max lat
a = [50, 20, 0,-20,-50] # Min lat, -20 to 10 for Amazon
b = [80, 50, 20, 0, -20] # Max lat
c = 0 # min lon, 280-310 for the Amazon
d = 360 # max lon
for i in range(len(a)):
svd_plot = svd2
mean_svd_plot = svd_plot.sel(lat=slice(a[i],b[i]), lon=slice(c,d)).mean(('lat','lon'))
# -- Amplitude vector --
mean_svd_plotA = svd_plot.where(sv_mask.ranked.isel(sv_type=0)).sel(lat=slice(a[i],b[i]), lon=slice(c,d)).mean(('lat','lon'))
countSetA = sv_mask.ranked.isel(sv_type=0).sel(lat=slice(a[i],b[i]), lon=slice(c,d))
nA = countSetA.where(countSetA>0).count().values
# -- Phase shift vector --
mean_svd_plotB = svd_plot.where(sv_mask.ranked.isel(sv_type=1)).sel(lat=slice(a[i],b[i]), lon=slice(c,d)).mean(('lat','lon'))
countSetB = sv_mask.ranked.isel(sv_type=1).sel(lat=slice(a[i],b[i]), lon=slice(c,d))
nB = countSetB.where(countSetB>0).count().values
monthly_plots(mean_svd_plot,mean_svd_plotA,mean_svd_plotB, a=a[i], b=b[i],nA=nA,nB=nB)
<Figure size 432x324 with 0 Axes>
svd0.weights.isel(sv=0,year=2).plot(robust=True);
def dtrend(anom, dim):
# note: apply always moves core dimensions to the end
return xr.apply_ufunc(signal.detrend, anom,
input_core_dims=[[dim]],
output_core_dims=[[dim]],
kwargs={'axis': -1})
# Build datastet to hold results
dims = dict(zip(ds_anom.get(var[0]).dims, ds_anom.get(var[0]).shape))
out_shape = (dims['lat'], dims['lon'], 4, years)
seasonID = xr.DataArray([12,3,6,9], dims='season') # seasonal time slices
seasonName = xr.DataArray(['DJF','MAM','JJA','SON'], dims='season') # can coords be a string?
yearID = xr.DataArray(np.unique(ds_anom.time['time.year']), dims='year')
dt_anom = xr.Dataset( coords={"lat": ds_anom.lat, "lon": ds_anom.lon,
"season": seasonName, "year": yearID} )
# set up another dataset for raw data that gets detrended
dt_raw = xr.Dataset( coords={"lat": ds_anom.lat, "lon": ds_anom.lon,
"season": seasonName, "year": yearID} )
for v in range(len(var)):
dt_anom[var[v]] = xr.DataArray(np.zeros(out_shape), dims=('lat', 'lon','season','year'))
dt_raw[var[v]] = xr.DataArray(np.zeros(out_shape), dims=('lat', 'lon','season','year'))
dt = dtrend(ds_anom.get(var[v]).load().fillna(0), 'time')
dt_r = dtrend(ds2.get(var[v]).load().fillna(0), 'time')
dt_season = dt.resample(time="QS-DEC").mean().isel(time=slice(0,-1))
dt_season_raw = dt_r.resample(time="QS-DEC").mean().isel(time=slice(0,-1))
# now reshape
for m in range(len(seasonID)):
temp = dt_season.where(dt_season.time['time.month']==seasonID[m], drop=True)
temp_raw = dt_season_raw.where(dt_season_raw.time['time.month']==seasonID[m], drop=True)
dt_anom[var[v]][:,:,m,:] = temp
dt_raw[var[v]][:,:,m,:] = temp_raw
dt_anom = dt_anom.transpose('season','year','lat','lon')
dt_raw = dt_raw.transpose( 'season','year','lat','lon')
dt_anom[var[0]].isel(season=2).mean(dim='year').plot(robust=True);
dt_SD = dt_raw.std(dim='year') # calculates SD of detrended data across seasons for each year
dt_anomSD = dt_anom.std(dim='year') # calculates SD of detrended anomalies across seasons for each year
print(dt_SD)
<xarray.Dataset> Dimensions: (lat: 192, lon: 288, season: 4) Coordinates: * lat (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 87.17 88.12 89.06 90.0 * lon (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8 * season (season) object 'DJF' 'MAM' 'JJA' 'SON' Data variables: GPP (season, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 TBOT (season, lat, lon) float64 1.268 1.268 1.268 1.268 ... 0.0 0.0 0.0 SOILLIQ (season, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
# calculate z-score, which will normalize for differences in SD of raw data
# z = anomaly / sigma (here done for each season)
dt_Z = dt_anom / dt_SD
print(dt_Z)
dt_Z.GPP.isel(season=0, year=21).plot(robust=True)
<xarray.Dataset> Dimensions: (lat: 192, lon: 288, season: 4, year: 25) Coordinates: * lat (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 87.17 88.12 89.06 90.0 * lon (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8 * season (season) object 'DJF' 'MAM' 'JJA' 'SON' * year (year) int64 1990 1991 1992 1993 1994 ... 2010 2011 2012 2013 2014 Data variables: GPP (season, year, lat, lon) float64 nan nan nan nan ... nan nan nan TBOT (season, year, lat, lon) float64 0.0 0.0 0.0 0.0 ... nan nan nan SOILLIQ (season, year, lat, lon) float64 nan nan nan nan ... nan nan nan
<matplotlib.collections.QuadMesh at 0x2ba73ccfa828>
# Results look at example from single grid cell, with
# GPP_sv weights ~ seasonal anomalies
# sv1 Early shift = spring & sv2 Amplificaton = summer
tlat = -5
tlon = 300
if tlat>=0:
plt.plot(dt_anom[var[0]].sel(lat=tlat, lon=tlon, method='nearest').isel(season=1),
svd2.weights.sel(lat=tlat, lon=tlon, method='nearest').isel(sv=0), '*', color='red')
plt.plot(dt_anom[var[0]].sel(lat=tlat, lon=tlon, method='nearest').isel(season=2),
svd2.weights.sel(lat=tlat, lon=tlon, method='nearest').isel(sv=1), '*', color='blue')
plt.legend(loc='upper right',labels=("SV1 weights ~ MAM GPP anomalies",
"SV2 weights ~ JJA GPP anomalies"));
else:
plt.plot(dt_anom[var[0]].sel(lat=tlat, lon=tlon, method='nearest').isel(season=3),
svd2.weights.sel(lat=tlat, lon=tlon, method='nearest').isel(sv=0), '*', color='red')
plt.plot(dt_anom[var[0]].sel(lat=tlat, lon=tlon, method='nearest').isel(season=1),
svd2.weights.sel(lat=tlat, lon=tlon, method='nearest').isel(sv=1), '*', color='blue')
plt.legend(loc='upper right',labels=("SV1 weights ~ SON GPP anomalies",
"SV2 weights ~ MAM GPP anomalies"))
plt.xlabel(var[0]+" seasonal anomalies (gC/m2/s)")
plt.ylabel(var[0]+" SV weights");
#print(svd2.sel(lat=tlat, lon=tlon, method='nearest').isel(sv=0))
# Results look at example from single grid cell, with
# GPP_sv weights ~ z-score of seasonal anomalies
# sv1 Early shift = spring & sv2 Amplificaton = summer
tlat = -5
tlon = 300
if tlat>=0:
plt.plot(dt_Z[var[0]].sel(lat=tlat, lon=tlon, method='nearest').isel(season=1),
svd2.weights.sel(lat=tlat, lon=tlon, method='nearest').isel(sv=0), '*', color='red')
plt.plot(dt_Z[var[0]].sel(lat=tlat, lon=tlon, method='nearest').isel(season=2),
svd2.weights.sel(lat=tlat, lon=tlon, method='nearest').isel(sv=1), '*', color='blue')
plt.legend(loc='upper left',labels=("SV1 weights ~ MAM GPP z-score",
"SV2 weights ~ JJA GPP z-score"));
else:
plt.plot(dt_Z[var[0]].sel(lat=tlat, lon=tlon, method='nearest').isel(season=3),
svd2.weights.sel(lat=tlat, lon=tlon, method='nearest').isel(sv=0), '*', color='red')
plt.plot(dt_Z[var[0]].sel(lat=tlat, lon=tlon, method='nearest').isel(season=1),
svd2.weights.sel(lat=tlat, lon=tlon, method='nearest').isel(sv=1), '*', color='blue')
plt.legend(loc='upper left',labels=("SV1 weights ~ SON GPP z-score",
"SV2 weights ~ MAM GPP z-score"))
plt.xlabel(var[0]+" seasonal z-score anomalies")
plt.ylabel(var[0]+" SV weights");
#print(svd2.sel(lat=tlat, lon=tlon, method='nearest').isel(sv=0))
# Write out correlation matrix for weights of GPP SV with detrended anomalies
# Loop over each SV (2), variable (3), & season (4)
def corr_wrap(anom, svd):
da = anom.to_array()
dims = dict(zip(da.dims, da.shape))
corr_shape = (2,dims['season'], dims['lat'], dims['lon'])
# setup output variables
corr = xr.Dataset( coords={'sv': range(2), 'season':seasonName,
"lat": ds.lat, "lon": ds.lon} )
for v in range(len(var)): # variables
corr[var[v]] = xr.DataArray(np.zeros(corr_shape), dims=('sv','season','lat', 'lon'))
# loop through to calculate correlation coef.
for i in range(2): # vectors
for j in range(len(seasonName)): # seasons
return_vals = pearson_correlation(anom.get(var[v]).isel(season=j),
svd['weights'].isel(sv=i), 'year')
corr[var[v]][i,j,...] = return_vals
return corr
# Create correlation matrix
# first make year dimensions indential
svd0['year'] = dt_anom.get('year')
svd2['year'] = dt_anom.get('year')
#dt_Z['year'] = dt_anom.get('year')
svd0_corr = corr_wrap(dt_anom, svd0)
svd2_corr = corr_wrap(dt_anom, svd2)
svd2_Zcorr = corr_wrap(dt_Z, svd2)
# --- Check that results ar sensible ---
# GPP SV1 coorelated with JJA GPP anomalies in NH, as expected
# GPP SV1 coorelated with MAM GPP anomalies in early shift regions (NE USA + Ag.)
svd2_corr[var[1]].isel(sv=0, season=3).plot(robust=True);
svd2_Zcorr[var[1]].isel(sv=0, season=3).plot(robust=True);
Weights do not necessarily have the same sign as GPP anomalies (e.g. see Amazon in plots above & below)
# xr.ufuncs.fabs calculates the absolute value for an xarray object
# argmax picks the index of the largest value across a dimension.
gppMask = svd0_corr[var[0]].max(dim='season')
maxGPP_season = xr.ufuncs.fabs(svd0_corr[var[0]].fillna(-1)).argmax(dim='season').where(gppMask.notnull())
maxGPP_season.isel(sv=0).plot(robust=True);
cor.svd[weights] ~ TBOT and TWS anomalies
## simplify correlations to look at values only when
# weights from SV most strongly correlate with seasonal GPP anomalies
# 1) set up dataset to hold results
max_season_corr = xr.Dataset( coords={'sv': range(years) , "lat": ds.lat, "lon": ds.lon} )
max_season_SD = xr.Dataset( coords={"lat": ds.lat, "lon": ds.lon} )
max_season_wgt = xr.Dataset( coords={'year':dt_anom.year, "lat":dt_anom.lat, "lon":dt_anom.lon} )
max_season_Z = xr.Dataset( coords={'year':dt_anom.year, "lat":dt_anom.lat, "lon":dt_anom.lon} )
for i in range(len(var)):
max_season_corr[var[i]] = xr.DataArray(np.zeros(corr_shape), dims=('sv','lat', 'lon'))
max_season_SD[var[i]] = xr.DataArray(np.zeros(yx_shape), dims=('lat', 'lon'))
max_season_wgt[var[i]] = xr.DataArray(np.zeros((years, dims['lat'], dims['lon'])),
dims=('year','lat', 'lon'))
max_season_Z[var[i]] = xr.DataArray(np.zeros((years, dims['lat'], dims['lon'])),
dims=('year','lat', 'lon'))
print(max_season_SD)
# 2) loop through vectors and seasons
# write out corr coefficients where maxGPP_seaon == T
for i in range(2): # vectors
for j in range(len(seasonName)): # seasons
for k in range(len(var)):
max_season_corr.get(var[k])[i,...] = svd2_corr.get(var[k])[i,j,...].where(maxGPP_season[i,...]==j,
max_season_corr.get(var[k])[i,...])
max_season_SD[var[k]] = dt_anomSD.get(var[k])[j,...].where(maxGPP_season[0,...]==j,
max_season_SD.get(var[k]))
max_season_wgt[var[k]] = dt_anom.get(var[k])[j,...].where(maxGPP_season[0,...]==j,
max_season_wgt.get(var[k]))
max_season_Z[var[k]] = dt_Z.get(var[k])[j,...].where(maxGPP_season[0,...]==j,
max_season_Z.get(var[k]))
#convert C flux anomalizes to gC/m2/d
max_season_wgt[var[0]] = max_season_wgt[var[0]] * 3600 * 24
max_season_SD[var[0]] = max_season_SD[var[0]] * 3600 * 24
max_season_SD[var[0]].plot();
<xarray.Dataset> Dimensions: (lat: 192, lon: 288) Coordinates: * lat (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 87.17 88.12 89.06 90.0 * lon (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8 Data variables: GPP (lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 TBOT (lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 SOILLIQ (lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0
# use xarray facet plots for pannels
# each row show correlations between SV_1 weights and seasonal anomalies
for k in range(len(var)):
simple = svd2_corr[var[k]].isel(sv=0).plot(x='lon', y='lat', col='season', col_wrap=4)
plt.savefig('/glade/scratch/wwieder/svd/'+model+'_'+var[k]+'multi_summary.eps')
print(dt_anomSD)
print(max_season_SD)
<xarray.Dataset> Dimensions: (lat: 192, lon: 288, season: 4) Coordinates: * lat (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 87.17 88.12 89.06 90.0 * lon (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8 * season (season) object 'DJF' 'MAM' 'JJA' 'SON' Data variables: GPP (season, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 TBOT (season, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 SOILLIQ (season, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 <xarray.Dataset> Dimensions: (lat: 192, lon: 288) Coordinates: * lat (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 87.17 88.12 89.06 90.0 * lon (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8 season <U3 'DJF' sv int64 0 Data variables: GPP (lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 TBOT (lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 SOILLIQ (lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0
# -- first add season of max correlation of SV weights with GPP anomalies to dataset being plotted
max_season_corr['maxGPP_season'] = maxGPP_season
max_season_SD['maxGPP_season'] = maxGPP_season.isel(sv=0)
var2 = ['maxGPP_season']+var
# Now call the map_function
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(16,8), constrained_layout=True,
subplot_kw=dict(projection=ccrs.Robinson()))
i = 0
for index, ax in np.ndenumerate(axes):
# there are various ways to do this part, index in this case is a tuple (ie `(0, 0)`)
da = max_season_corr[var2[i]].isel(sv=0)
if i == 0:
#Can I ake these arguments a list or a dictionary?
cmap = plt.get_cmap("viridis",4)
vmax=3.5
vmin=-0.5
title='month max correlation SVD1 weights & '+var[0]+' anomalies'
else:
cmap='RdBu_r'
vmax=1
vmin=-1
title='correlation SV1 Weights ~ seasonal '+var2[i]+' anomalies'
map_function(da, ax=ax, title=title, cmap=cmap, vmax=vmax, vmin=vmin,cb=1)
i = i+1
# or to provide a common color bar, set vmax/min so same values and
#fig.colorbar(im,ax=axes.ravel().tolist(), shrink=0.5);
plt.savefig('/glade/scratch/wwieder/svd/'+model+'_'+var[0]+'_summary.eps')
# plot SD of GPP anomalies in season when max correlation between SV1 & GPP anomalies
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(16,8), constrained_layout=True,
subplot_kw=dict(projection=ccrs.Robinson()))
i = 0
maxVals = [3.5 ,2.5,2.5,200] #arbitrary values for min/max
minVals = [-0.5,0 ,0 ,0 ]
for index, ax in np.ndenumerate(axes):
# there are various ways to do this part, index in this case is a tuple (ie `(0, 0)`)
if i == 0:
da = max_season_SD[var2[i]]#.where(max_season_corr[var2[i]].isel(sv=0))
#Can I ake these arguments a list or a dictionary?
cmap = plt.get_cmap("viridis",4)
#cmap = colors.ListedColormap(['r', 'g', 'b', 'c'])
title='month max correlation SVD1 weights & '+var[0]+' anomalies'
else:
da = max_season_SD[var2[i]].where(max_season_corr[var2[i]].isel(sv=0))
cmap='viridis'
title=model+' SD '+var2[i]+' seasonal anomalies'
vmax=maxVals[i]
vmin=minVals[i]
map_function(da, ax=ax, title=title, cmap=cmap, vmax=vmax, vmin=vmin,cb=1)
i = i+1
plt.savefig('/glade/scratch/wwieder/svd/'+model+'_SD_anomalies.eps')
# Now call the map_function
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(16,8), constrained_layout=True,
subplot_kw=dict(projection=ccrs.Robinson()))
i = 0
for index, ax in np.ndenumerate(axes):
# there are various ways to do this part, index in this case is a tuple (ie `(0, 0)`)
da = max_season_corr[var2[i]].isel(sv=0).where(sv_mask.ranked.isel(sv_type=0))
if i == 0:
#Can I ake these arguments a list or a dictionary?
cmap = plt.get_cmap("viridis",4)
#cmap = colors.ListedColormap(['r', 'g', 'b', 'c'])
vmax=3.5
vmin=-0.5
title='month max correlation SVD1 weights & '+var[0]+' anomalies'
else:
cmap='RdBu_r'
vmax=1
vmin=-1
title='correlation SV1 Weights ~ seasonal '+var2[i]+' anomalies'
map_function(da, ax=ax, title=title, cmap=cmap, vmax=vmax, vmin=vmin,cb=1)
i = i+1
# Now call the map_function
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(16,8), constrained_layout=True,
subplot_kw=dict(projection=ccrs.Robinson()))
i = 0
for index, ax in np.ndenumerate(axes):
# there are various ways to do this part, index in this case is a tuple (ie `(0, 0)`)
da = max_season_corr[var2[i]].isel(sv=0).where(sv_mask.ranked.isel(sv_type=1))
if i == 0:
#Can I ake these arguments a list or a dictionary?
cmap = plt.get_cmap("viridis",4)
#cmap = colors.ListedColormap(['r', 'g', 'b', 'c'])
vmax=3.5
vmin=-0.5
title='month max correlation SVD1 weights & '+var[0]+' anomalies'
else:
cmap='RdBu_r'
vmax=1
vmin=-1
title='correlation SV1 Weights ~ seasonal '+var2[i]+' anomalies'
map_function(da, ax=ax, title=title, cmap=cmap, vmax=vmax, vmin=vmin,cb=1)
i = i+1
# not sure how to get a sensible range of values here
max_season_Z.where(sv_mask.ranked.isel(sv_type=0)).plot.scatter(x='SOILLIQ',y='GPP');
max_season_Z.where(sv_mask.ranked.isel(sv_type=1)).plot.scatter(x='TBOT',y='GPP');
# ignore results from low weights
a = [50, 25,-25, -50] # Min lat, -20 to 10 for Amazon
b = [80, 50, 25,-20] # Max lat
max_season_wgt2 = max_season_wgt.where(xr.ufuncs.fabs(svd2.weights.isel(sv=0)) > 0.0)
max_season_Z2 = max_season_Z.where(xr.ufuncs.fabs(svd2.weights.isel(sv=0)) > 0.0)
col = ('blue','black','green','purple','red')
plt.figure(figsize=[16,5])
for i in range(2):
plt.subplot(1, 2, (1+i))
for j in range(len(a)):
#ds_in = max_season_wgt2.sel(lat=slice(a[j],b[j]))
#ds_in = max_season_wgt2.where(sv_mask.ranked.isel(sv_type=i)).sel(lat=slice(a[j],b[j]))
ds_in = max_season_Z2.where(sv_mask.ranked.isel(sv_type=i)).sel(lat=slice(a[j],b[j]))
y = ds_in[var[0]].values.flatten()
if i == 0:
x = ds_in['SOILLIQ'].values.flatten()
plt.xlabel('SOILLIQ z-score anomalies')
plt.ylabel(var[0]+" z-score anomalies (amplification)")
else:
x = ds_in['TBOT'].values.flatten()
plt.ylabel(var[0]+" z-score anomalies (early shift)")
plt.xlabel('TBOT z-score anomalies')
y = y[~np.isnan(y)]
x = x[~np.isnan(x)]
plt.plot(x, y, ',',color=col[j])#, alpha=(1/(j+2)))
plt.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1))(np.unique(x)),color=col[j])
plt.ylim(-5,5)
if i == 0:
plt.legend(('','Arctic','','NH Mid','','Tropics','','SH Mid'),loc=2,frameon=False)
plt.subplots_adjust(wspace=0.2)
plt.savefig('/glade/scratch/wwieder/svd/'+model+'_Zcorr.eps')
# early shift may have stronger correlation soil moisture z-score in tropics and SH
col = ('blue','black','green','purple','red')
plt.figure(figsize=[16,5])
for i in range(2):
plt.subplot(1, 2, (1+i))
for j in range(len(a)):
#ds_in = max_season_wgt2.sel(lat=slice(a[j],b[j]))
#ds_in = max_season_wgt2.where(sv_mask.ranked.isel(sv_type=i)).sel(lat=slice(a[j],b[j]))
ds_in = max_season_Z2.where(sv_mask.ranked.isel(sv_type=i)).sel(lat=slice(a[j],b[j]))
y = ds_in[var[0]].values.flatten()
if i == 0:
x = ds_in['TBOT'].values.flatten()
plt.xlabel('TBOT z-score anomalies')
plt.ylabel(var[0]+" z-score anomalies (amplification)")
else:
x = ds_in['SOILLIQ'].values.flatten()
plt.ylabel(var[0]+" z-score anomalies (early shift)")
plt.xlabel('SOILLIQ z-score anomalies')
y = y[~np.isnan(y)]
x = x[~np.isnan(x)]
plt.plot(x, y, ',',color=col[j])#, alpha=(1/(j+2)))
plt.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1))(np.unique(x)),color=col[j])
plt.ylim(-5,5)
if i == 0:
plt.legend(('','Arctic','','NH Mid','','Tropics','','SH Mid'),loc=2,frameon=False)
plt.subplots_adjust(wspace=0.2)
plt.figure(figsize=[16,5])
for i in range(2):
plt.subplot(1, 2, (1+i))
#ds_in = max_season_wgt2.sel(lat=slice(a[j],b[j]))
ds_in = max_season_Z2.where(sv_mask.ranked.isel(sv_type=i))
ds_in['GPP'] = xr.where(ds_in['GPP']< -2, -2, ds_in['GPP'])
y = ds_in[var[0]].values.flatten()
if i == 0:
x = ds_in['SOILLIQ'].values.flatten()
plt.xlabel('SOILLIQ anomalies')
plt.ylabel(var[0]+" anomalies (amplification)")
Xrange = [[-4, 4], [-4, 4]]
else:
x = ds_in['TBOT'].values.flatten()
plt.ylabel(var[0]+" anomalies (early shift)")
plt.xlabel('TBOT anomalies')
Xrange = [[-4, 4], [-4, 4]]
y = y[~np.isnan(y)]
x = x[~np.isnan(x)]
#plt.plot(x, y, ',',color=col[j])#, alpha=(1/(j+2)))
plt.hist2d(x, y, bins=10, range=Xrange)#, alpha=(1/(j+2)))
simple = max_season_Z2.GPP.plot(x='lon', y='lat', col='year', col_wrap=9, robust=True);
# output = slope, intercept, r, p, SE
def reg3d(x, y):
reg = xr.apply_ufunc(scipy.stats.linregress, x, y,
input_core_dims=[['year'], ['year']],
output_core_dims=[[], [], [], [], []],
vectorize=True, dataset_join='exact')
return reg
reg_shape = (2,5, ) + yx_shape
types = ['anomaly','z-score']
lin_out = ['slope', 'intercept', 'r','p', 'SE']
reg_out = xr.Dataset( coords={'type':types,'lin_reg':lin_out, "lat": ds.lat, "lon": ds.lon} )
for i in range(2):
reg_out[var[(i+1)]] = xr.DataArray(np.zeros(reg_shape), dims=('type','lin_reg','lat', 'lon'))
temp0 = reg3d(max_season_wgt.get(var[(i+1)]),max_season_wgt.get(var[0]))
temp1 = reg3d(max_season_Z.get(var[(i+1)]),max_season_Z.get(var[0]))
# not sure how to get series of 5 arrays into dataset, but this works
for j in range(5):
reg_out[var[(i+1)]][0,j,...] = temp0[j]
reg_out[var[(i+1)]][1,j,...] = temp1[j]
print(reg_out)
<xarray.Dataset> Dimensions: (lat: 192, lin_reg: 5, lon: 288, type: 2) Coordinates: * lat (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 87.17 88.12 89.06 90.0 * lin_reg (lin_reg) object 'slope' 'intercept' 'r' 'p' 'SE' * lon (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8 * type (type) object 'anomaly' 'z-score' Data variables: TBOT (type, lin_reg, lat, lon) float64 nan nan nan nan ... nan nan nan SOILLIQ (type, lin_reg, lat, lon) float64 nan nan nan nan ... nan nan nan
reg_outSig = reg_out.where(reg_out.isel(lin_reg=3)<0.05)
#reg_outSig.TBOT.isel(type=0, lin_reg=0).plot(robust=True);
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(16,8), constrained_layout=True,
subplot_kw=dict(projection=ccrs.Robinson()))
imax = [0.5, 0.02]
imin = [-0.5, -0.02]
for index, ax in np.ndenumerate(axes):
i = index[0]
j = index[1]
da = reg_outSig[var[(j+1)]].isel(lin_reg=0, type=i)
title='slope GPP ~ '+var[(j+1)]+' '+types[i]
if j == 0:
cmap='RdBu_r'
else:
cmap='BrBG'
if i == 0:
vmax=imax[j]
vmin=imin[j]
else:
vmax=1.5
vmin=-1.5
map_function(da, ax=ax, title=title, cmap=cmap, vmax=vmax, vmin=vmin,cb=1)
plt.savefig('/glade/scratch/wwieder/svd/'+model+'_regressionSlope.pdf')
print(cmap)
BrBG