import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import netCDF4 as nc
import datetime as dt
from salishsea_tools import evaltools as et, viz_tools
import gsw
import matplotlib.gridspec as gridspec
import matplotlib as mpl
import matplotlib.dates as mdates
import cmocean as cmo
import scipy.interpolate as sinterp
import pickle
import cmocean
import json
import f90nml
from collections import OrderedDict
fs=16
mpl.rc('xtick', labelsize=fs)
mpl.rc('ytick', labelsize=fs)
mpl.rc('legend', fontsize=fs)
mpl.rc('axes', titlesize=fs)
mpl.rc('axes', labelsize=fs)
mpl.rc('figure', titlesize=fs)
mpl.rc('font', size=fs)
mpl.rc('font', family='sans-serif', weight='normal', style='normal')
import warnings
#warnings.filterwarnings('ignore')
from IPython.display import Markdown, display
%matplotlib inline
year=2010
PATH= '/results2/SalishSea/nowcast-green.201905/'
datadir='/ocean/eolson/MEOPAR/obs/WADE/ptools_data/ecology'
display(Markdown('''## Year: '''+ str(year)))
display(Markdown('''### Model output: '''+ PATH))
dfSta=pickle.load(open(os.path.join(datadir,'sta_df.p'),'rb'))
dfSta.head()
Desig | Descrip | Basin | Max_Depth | Latitude | Longitude | |
---|---|---|---|---|---|---|
Station | ||||||
ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 |
ADM002 | C | Admiralty Inlet (north) - Quimper Pn. | Admiralty Inlet | 79 | 48.187318 | -122.842950 |
ADM003 | C | Admiralty Inlet (south) | Admiralty Inlet | 118 | 47.878983 | -122.483195 |
BLL009 | C | Bellingham Bay - Pt. Frances | Strait of Georgia | 31 | 48.685940 | -122.599618 |
BUD005 | C | Budd Inlet - Olympia Shoal | South Basin | 22 | 47.092040 | -122.918197 |
dfCTD0=pickle.load(open(os.path.join(datadir,f'Casts_{str(year)}.p'),'rb'))
dfCTD0.head()
Salinity | Temperature | Sigma | Chl | DO | Turb | Z | Station | Date | |
---|---|---|---|---|---|---|---|---|---|
0 | 30.257401 | 8.1037 | 23.537201 | 0.614771 | 8.010357 | 0.674165 | -116.5 | ADM001 | 2010-01-21 |
1 | 30.256701 | 8.1038 | 23.536600 | 0.657414 | 7.978440 | 0.707311 | -116.0 | ADM001 | 2010-01-21 |
2 | 30.256100 | 8.1039 | 23.536100 | 0.657414 | 7.990206 | 0.708538 | -115.5 | ADM001 | 2010-01-21 |
3 | 30.255501 | 8.1041 | 23.535601 | 0.643200 | 7.993524 | 0.704281 | -115.0 | ADM001 | 2010-01-21 |
4 | 30.255400 | 8.1041 | 23.535601 | 0.614771 | 7.994764 | 0.691650 | -114.5 | ADM001 | 2010-01-21 |
dfCTD=pd.merge(left=dfSta,right=dfCTD0,how='right',
left_on='Station',right_on='Station')
#right join means all rows in right table (dfCTD) are included in output
dfCTD.head()
Station | Desig | Descrip | Basin | Max_Depth | Latitude | Longitude | Salinity | Temperature | Sigma | Chl | DO | Turb | Z | Date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 30.257401 | 8.1037 | 23.537201 | 0.614771 | 8.010357 | 0.674165 | -116.5 | 2010-01-21 |
1 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 30.256701 | 8.1038 | 23.536600 | 0.657414 | 7.978440 | 0.707311 | -116.0 | 2010-01-21 |
2 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 30.256100 | 8.1039 | 23.536100 | 0.657414 | 7.990206 | 0.708538 | -115.5 | 2010-01-21 |
3 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 30.255501 | 8.1041 | 23.535601 | 0.643200 | 7.993524 | 0.704281 | -115.0 | 2010-01-21 |
4 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 30.255400 | 8.1041 | 23.535601 | 0.614771 | 7.994764 | 0.691650 | -114.5 | 2010-01-21 |
# check that there are no stations without lat and lon:
dfCTD.loc[pd.isnull(dfCTD['Latitude'])]
Station | Desig | Descrip | Basin | Max_Depth | Latitude | Longitude | Salinity | Temperature | Sigma | Chl | DO | Turb | Z | Date |
---|
# check one to one matches:
len(dfCTD),len(dfCTD0), len(dfSta)
(54503, 54503, 39)
# where no time is provided, set time to midday Pacific time = ~ 20:00 UTC for now
# (most sampling takes place during the day)
# accurate times will be provided at a later date
# the code below takes advantage of all elements in 'Date' having a time component
# set to midnight
dfCTD['dtUTC']=[iiD+dt.timedelta(hours=20) for iiD in dfCTD['Date']]
# We require the following columns:
# dtUTC datetime
# Lat Latitude
# Lon Longitude
# Z Depth, increasing downward (positive)
dfCTD.rename(columns={'Latitude':'Lat','Longitude':'Lon'},inplace=True)
dfCTD['Z']=-1*dfCTD['Z']
dfCTD.head()
Station | Desig | Descrip | Basin | Max_Depth | Lat | Lon | Salinity | Temperature | Sigma | Chl | DO | Turb | Z | Date | dtUTC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 30.257401 | 8.1037 | 23.537201 | 0.614771 | 8.010357 | 0.674165 | 116.5 | 2010-01-21 | 2010-01-21 20:00:00 |
1 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 30.256701 | 8.1038 | 23.536600 | 0.657414 | 7.978440 | 0.707311 | 116.0 | 2010-01-21 | 2010-01-21 20:00:00 |
2 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 30.256100 | 8.1039 | 23.536100 | 0.657414 | 7.990206 | 0.708538 | 115.5 | 2010-01-21 | 2010-01-21 20:00:00 |
3 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 30.255501 | 8.1041 | 23.535601 | 0.643200 | 7.993524 | 0.704281 | 115.0 | 2010-01-21 | 2010-01-21 20:00:00 |
4 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 30.255400 | 8.1041 | 23.535601 | 0.614771 | 7.994764 | 0.691650 | 114.5 | 2010-01-21 | 2010-01-21 20:00:00 |
# Calculate Absolute (Reference) Salinity (g/kg) and Conservative Temperature (deg C) from
# Salinity (psu) and Temperature (deg C):
press=gsw.p_from_z(-1*dfCTD['Z'].values,dfCTD['Lat'].values)
dfCTD['SA']=gsw.SA_from_SP(dfCTD['Salinity'].values,press,
dfCTD['Lon'].values,dfCTD['Lat'].values)
dfCTD['CT']=gsw.CT_from_t(dfCTD['SA'].values,dfCTD['Temperature'].values,press)
print(len(dfCTD),'data points')
print('Number of data points in each region:')
dfCTD.groupby('Basin')['SA'].count()
54503 data points Number of data points in each region:
Basin Admiralty Inlet 8038 Grays Harbor 336 Hood Canal Basin 3114 Main Basin 11121 South Basin 9385 Strait of Georgia 3370 Strait of Juan de Fuca 9189 Whidbey Basin 5022 Willapa Bay 716 Name: SA, dtype: int64
# start_date and end_date are the first and last dates that will
# be included in the matched data set
start_date = dt.datetime(year,1,1)
end_date = dt.datetime(year,12,31)
flen=1 # number of days per model output file. always 1 for 201905 and 201812 model runs
namfmt='nowcast' # for 201905 and 201812 model runs, this should always be 'nowcast'
# filemap is dictionary of the form variableName: fileType, where variableName is the name
# of the variable you want to extract and fileType designates the type of
# model output file it can be found in (usually ptrc_T for biology, grid_T for temperature and
# salinity)
filemap={'vosaline':'grid_T','votemper':'grid_T'}
# fdict is a dictionary mappy file type to its time resolution. Here, 1 means hourly output
# (1h file) and 24 means daily output (1d file). In certain runs, multiple time resolutions
# are available
fdict={'ptrc_T':1,'grid_T':1}
# Note: to switch between 201812 and 201905 model results, change PATH
# to switch from hourly to daily model output, change fdict values from 1 to 24 (but daily
# files are not available for some runs and file types)
data=et.matchData(dfCTD,filemap,fdict,start_date,end_date,'nowcast',PATH,1,quiet=False);
(Lat,Lon)= 46.54537666666667 -123.98016166666666 not matched to domain (Lat,Lon)= 46.644 -123.993 not matched to domain (Lat,Lon)= 46.68676333333333 -123.9735 not matched to domain (Lat,Lon)= 46.68732166666667 -123.74988166666667 not matched to domain (Lat,Lon)= 46.703986666666665 -123.837385 not matched to domain (Lat,Lon)= 46.937313333333336 -123.91322333333333 not matched to domain (Lat,Lon)= 46.953421666666664 -124.09295 not matched to domain (Lat,Lon)= 46.97787 -123.78461 not matched to domain (Lat,Lon)= 47.21342666666666 -123.07765 not matched to domain progress: 0.0% progress: 9.40043994058922% progress: 18.80087988117844% progress: 28.201319821767655% progress: 37.60175976235688% progress: 47.00219970294609% progress: 56.40263964353531% progress: 65.80307958412453% progress: 75.20351952471376% progress: 84.60395946530298% progress: 94.00439940589219%
cm1=cmocean.cm.thermal
with nc.Dataset('/data/eolson/results/MEOPAR/NEMO-forcing-new/grid/bathymetry_201702.nc') as bathy:
bathylon=np.copy(bathy.variables['nav_lon'][:,:])
bathylat=np.copy(bathy.variables['nav_lat'][:,:])
bathyZ=np.copy(bathy.variables['Bathymetry'][:,:])
<ipython-input-19-91c5eca2ab2e>:3: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations bathylon=np.copy(bathy.variables['nav_lon'][:,:]) <ipython-input-19-91c5eca2ab2e>:4: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations bathylat=np.copy(bathy.variables['nav_lat'][:,:]) <ipython-input-19-91c5eca2ab2e>:5: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations bathyZ=np.copy(bathy.variables['Bathymetry'][:,:])
fig, ax = plt.subplots(1,1,figsize = (6,6))
with nc.Dataset('/data/vdo/MEOPAR/NEMO-forcing/grid/bathymetry_201702.nc') as grid:
viz_tools.plot_coastline(ax, grid, coords = 'map',isobath=.1)
colors=('teal','green','firebrick','darkorange','darkviolet','fuchsia',
'royalblue','darkgoldenrod','mediumspringgreen','deepskyblue')
datreg=dict()
for ind, iregion in enumerate(data.Basin.unique()):
datreg[iregion] = data.loc[data.Basin==iregion]
ax.plot(datreg[iregion]['Lon'], datreg[iregion]['Lat'],'.',
color = colors[ind], label=iregion)
ax.set_ylim(47, 49)
ax.legend(bbox_to_anchor=[1,.6,0,0])
ax.set_xlim(-124, -122);
ax.set_title('Observation Locations');
iz=(data.Z<15)
JFM=data.loc[iz&(data.dtUTC<=dt.datetime(year,4,1)),:]
Apr=data.loc[iz&(data.dtUTC<=dt.datetime(year,5,1))&(data.dtUTC>dt.datetime(year,4,1)),:]
MJJA=data.loc[iz&(data.dtUTC<=dt.datetime(year,9,1))&(data.dtUTC>dt.datetime(year,5,1)),:]
SOND=data.loc[iz&(data.dtUTC>dt.datetime(year,9,1)),:]
def byDepth(ax,obsvar,modvar,lims):
ps=et.varvarPlot(ax,data,obsvar,modvar,'Z',(15,22),'z','m',('mediumseagreen','darkturquoise','navy'))
l=ax.legend(handles=ps)
ax.set_xlabel('Obs')
ax.set_ylabel('Model')
ax.plot(lims,lims,'k-',alpha=.5)
ax.set_xlim(lims)
ax.set_ylim(lims)
ax.set_aspect(1)
return ps,l
def byRegion(ax,obsvar,modvar,lims):
ps=[]
for ind, iregion in enumerate(data.Basin.unique()):
ax.plot(datreg[iregion]['Lon'], datreg[iregion]['Lat'],'.',
color = colors[ind], label=iregion)
ps0=et.varvarPlot(ax,datreg[iregion],obsvar,modvar,
cols=(colors[ind],),lname=iregion)
ps.append(ps0)
l=ax.legend(handles=[ip[0][0] for ip in ps])
ax.set_xlabel('Obs')
ax.set_ylabel('Model')
ax.plot(lims,lims,'k-',alpha=.5)
ax.set_xlim(lims)
ax.set_ylim(lims)
ax.set_aspect(1)
return ps,l
def bySeason(ax,obsvar,modvar,lims):
for axi in ax:
axi.plot(lims,lims,'k-')
axi.set_xlim(lims)
axi.set_ylim(lims)
axi.set_aspect(1)
axi.set_xlabel('Obs')
axi.set_ylabel('Model')
ps=et.varvarPlot(ax[0],JFM,obsvar,modvar,cols=('crimson','darkturquoise','navy'))
ax[0].set_title('Jan-Mar')
ps=et.varvarPlot(ax[1],Apr,obsvar,modvar,cols=('crimson','darkturquoise','navy'))
ax[1].set_title('Apr')
ps=et.varvarPlot(ax[2],MJJA,obsvar,modvar,cols=('crimson','darkturquoise','navy'))
ax[2].set_title('May-Aug')
ps=et.varvarPlot(ax[3],SOND,obsvar,modvar,cols=('crimson','darkturquoise','navy'))
ax[3].set_title('Sep-Dec')
return
def ErrErr(fig,ax,obsvar1,modvar1,obsvar2,modvar2,lims1,lims2):
m=ax.scatter(data[modvar1]-data[obsvar1],data[modvar2]-data[obsvar2],c=data['Z'],s=1,cmap='gnuplot')
cb=fig.colorbar(m,ax=ax,label='Depth (m)')
ax.set_xlim(lims1)
ax.set_ylim(lims2)
ax.set_aspect((lims1[1]-lims1[0])/(lims2[1]-lims2[0]))
return m,cb
### These groupings will be used to calculate statistics. The keys are labels and
### the values are corresponding dataframe views
statsubs=OrderedDict({'z < 15 m':data.loc[data.Z<15],
'15 m < z < 22 m':data.loc[(data.Z>=15)&(data.Z<22)],
'z >= 22 m':data.loc[data.Z>=22],
'z > 50 m':data.loc[data.Z>50],
'all':data,
'z < 15 m, JFM':JFM,
'z < 15 m, Apr':Apr,
'z < 15 m, MJJA':MJJA,
'z < 15 m, SOND': SOND,})
for iregion in data.Basin.unique():
statsubs[iregion]=datreg[iregion]
statsubs.keys()
odict_keys(['z < 15 m', '15 m < z < 22 m', 'z >= 22 m', 'z > 50 m', 'all', 'z < 15 m, JFM', 'z < 15 m, Apr', 'z < 15 m, MJJA', 'z < 15 m, SOND', 'Main Basin', 'Hood Canal Basin', 'South Basin', 'Admiralty Inlet', 'Whidbey Basin', 'Strait of Georgia', 'Strait of Juan de Fuca'])
obsvar='SA'
modvar='mod_vosaline'
statsDict={year:dict()}
statsDict[year]['SA']=OrderedDict()
for isub in statsubs:
print(isub)
statsDict[year]['SA'][isub]=dict()
var=statsDict[year]['SA'][isub]
var['N'],mmean,omean,var['Bias'],var['RMSE'],var['WSS']=et.stats(statsubs[isub].loc[:,[obsvar]],
statsubs[isub].loc[:,[modvar]])
tbl,tdf=et.displayStats(statsDict[year]['SA'],level='Subset',suborder=list(statsubs.keys()))
tbl
z < 15 m 15 m < z < 22 m z >= 22 m z > 50 m all z < 15 m, JFM z < 15 m, Apr z < 15 m, MJJA z < 15 m, SOND Main Basin Hood Canal Basin South Basin Admiralty Inlet Whidbey Basin Strait of Georgia Strait of Juan de Fuca
Bias | N | RMSE | WSS | ||
---|---|---|---|---|---|
Subset | |||||
0 | z < 15 m | -0.517532 | 7521.000000 | 1.314116 | 0.886112 |
1 | 15 m < z < 22 m | -0.323539 | 3310.000000 | 0.570391 | 0.895162 |
2 | z >= 22 m | -0.046226 | 36988.000000 | 0.558611 | 0.929680 |
3 | z > 50 m | 0.023852 | 25238.000000 | 0.614428 | 0.921299 |
4 | all | -0.139548 | 47819.000000 | 0.731776 | 0.925733 |
5 | z < 15 m, JFM | -0.412417 | 1914.000000 | 1.407758 | 0.890701 |
6 | z < 15 m, Apr | -0.744317 | 705.000000 | 1.214579 | 0.839706 |
7 | z < 15 m, MJJA | -0.678849 | 2528.000000 | 1.457054 | 0.873306 |
8 | z < 15 m, SOND | -0.363150 | 2374.000000 | 1.083675 | 0.881554 |
9 | Main Basin | -0.051193 | 11081.000000 | 0.583447 | 0.839376 |
10 | Hood Canal Basin | -0.193592 | 2929.000000 | 0.843410 | 0.928556 |
11 | South Basin | -0.663957 | 8826.000000 | 1.160087 | 0.467400 |
12 | Admiralty Inlet | 0.050495 | 8008.000000 | 0.358007 | 0.945518 |
13 | Whidbey Basin | -0.014365 | 4886.000000 | 0.977866 | 0.930045 |
14 | Strait of Georgia | 0.013351 | 3264.000000 | 0.606138 | 0.906268 |
15 | Strait of Juan de Fuca | -0.006395 | 8825.000000 | 0.332482 | 0.954166 |
fig, ax = plt.subplots(1,2,figsize = (16,7))
ps,l=byDepth(ax[0],obsvar,modvar,(0,40))
ax[0].set_title('S$_A$ (g kg$^{-1}$) By Depth')
ps,l=byRegion(ax[1],obsvar,modvar,(0,40))
ax[1].set_title('S$_A$ (g kg$^{-1}$) By Region');
fig, ax = plt.subplots(1,4,figsize = (16,3.3))
bySeason(ax,obsvar,modvar,(0,30))
fig,ax=plt.subplots(1,1,figsize=(20,.3))
ax.plot(data.dtUTC,np.ones(np.shape(data.dtUTC)),'k.')
ax.set_xlim((dt.datetime(year,1,1),dt.datetime(year,12,31)))
ax.set_title('Data Timing')
ax.yaxis.set_visible(False)
obsvar='CT'
modvar='mod_votemper'
statsDict[year]['CT']=OrderedDict()
for isub in statsubs:
statsDict[year]['CT'][isub]=dict()
var=statsDict[year]['CT'][isub]
var['N'],mmean,omean,var['Bias'],var['RMSE'],var['WSS']=et.stats(statsubs[isub].loc[:,[obsvar]],
statsubs[isub].loc[:,[modvar]])
tbl,tdf=et.displayStats(statsDict[year]['CT'],level='Subset',suborder=list(statsubs.keys()))
tbl
mv=(0,80)
fig, ax = plt.subplots(1,2,figsize = (16,7))
ps,l=byDepth(ax[0],obsvar,modvar,mv)
ax[0].set_title('$\Theta$ ($^{\circ}$C) By Depth')
ps,l=byRegion(ax[1],obsvar,modvar,mv)
ax[1].set_title('$\Theta$ ($^{\circ}$C) By Region');
fig, ax = plt.subplots(1,4,figsize = (16,3.3))
bySeason(ax,obsvar,modvar,mv)
fig,ax=plt.subplots(1,1,figsize=(20,.3))
ax.plot(data.dtUTC,np.ones(np.shape(data.dtUTC)),'k.')
ax.set_xlim((dt.datetime(year,1,1),dt.datetime(year,12,31)))
ax.set_title('Data Timing')
ax.yaxis.set_visible(False)
### Temperature-Salinity by Region
def tsplot(ax,svar,tvar):
limsS=(0,36)
limsT=(5,20)
ss,tt=np.meshgrid(np.linspace(limsS[0],limsS[1],20),np.linspace(limsT[0],limsT[1],20))
rho=gsw.rho(ss,tt,np.zeros(np.shape(ss)))
r=ax.contour(ss,tt,rho,colors='k')
ps=list()
for ind, iregion in enumerate(data.Basin.unique()):
p=ax.plot(datreg[iregion][svar], datreg[iregion][tvar],'.',
color = colors[ind], label=iregion)
ps.append(p[0])
l=ax.legend(handles=ps,bbox_to_anchor=(1.01,1))
ax.set_ylim(limsT)
ax.set_xlim(limsS)
ax.set_ylabel('$\Theta$ ($^{\circ}$C)')
ax.set_xlabel('S$_A$ (g kg$^{-1}$)')
ax.set_aspect((limsS[1]-limsS[0])/(limsT[1]-limsT[0]))
return
fig,ax=plt.subplots(1,2,figsize=(16,3.5))
tsplot(ax[0],'SA','CT')
ax[0].set_title('Observed')
tsplot(ax[1],'mod_vosaline','mod_votemper')
ax[1].set_title('Modelled')
tbl,tdf=et.displayStats(statsDict[year],level='Variable',suborder=list(statsubs.keys()))
tbl