This Script only exists as a place to load up all of the pickle files files while I am busy doing other things.

In [1]:

import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import netCDF4 as nc
import datetime as dt
from salishsea_tools import evaltools as et, viz_tools
import gsw 
import matplotlib.gridspec as gridspec
import matplotlib as mpl
import matplotlib.dates as mdates
import cmocean as cmo
import scipy.interpolate as sinterp
import pickle
import cmocean
import json
import f90nml
from collections import OrderedDict
from matplotlib.colors import LogNorm

fs=16
mpl.rc('xtick', labelsize=fs)
mpl.rc('ytick', labelsize=fs)
mpl.rc('legend', fontsize=fs)
mpl.rc('axes', titlesize=fs)
mpl.rc('axes', labelsize=fs)
mpl.rc('figure', titlesize=fs)
mpl.rc('font', size=fs)
mpl.rc('font', family='sans-serif', weight='normal', style='normal')

import warnings
#warnings.filterwarnings('ignore')
from IPython.display import Markdown, display

%matplotlib inline

In [2]:

year=2019
modelversion='nowcast-green.201905'
PATH= '/results2/SalishSea/nowcast-green.201905/'
datadir='/ocean/eolson/MEOPAR/obs/WADE/ptools_data/ecology'

load observations¶

In [5]:

dfSta=pickle.load(open(os.path.join(datadir,'sta_df.p'),'rb'))

In [6]:

dfSta.head()

Out[6]:

	Desig	Descrip	Basin	Max_Depth	Latitude	Longitude
Station
ADM001	C	Admiralty Inlet - Bush Pt.	Admiralty Inlet	114	48.029813	-122.617933
ADM002	C	Admiralty Inlet (north) - Quimper Pn.	Admiralty Inlet	79	48.187318	-122.842950
ADM003	C	Admiralty Inlet (south)	Admiralty Inlet	118	47.878983	-122.483195
BLL009	C	Bellingham Bay - Pt. Frances	Strait of Georgia	31	48.685940	-122.599618
BUD005	C	Budd Inlet - Olympia Shoal	South Basin	22	47.092040	-122.918197

In [7]:

dfBot=pickle.load(open(os.path.join(datadir,f'Bottles_{str(year)}.p'),'rb'))

In [8]:

dfBot.head()

Out[8]:

	Station	Date	UTCDateTime	Z	PO4(uM)D	SiOH4(uM)D	NO3(uM)D	NO2(uM)D	NH4(uM)D	DIN	Znom
0	ADM001	2019-01-08	2019-01-08 19:27:00	-30.119	2.2138	52.2913	26.2228	0.1347	0.0000	26.3575	-30.0
1	ADM001	2019-01-08	2019-01-08 19:27:00	-10.262	2.2413	53.5360	26.3984	0.1256	0.0203	26.5443	-10.0
2	ADM001	2019-01-08	2019-01-08 19:27:00	-0.587	2.2755	56.0817	26.6102	0.1552	0.0347	26.8001	0.0
3	ADM001	2019-02-06	2019-02-06 19:04:00	-29.444	2.2109	52.4005	25.5163	0.1996	0.0000	25.7159	-30.0
4	ADM001	2019-02-06	2019-02-06 19:04:00	-9.722	2.2208	51.9906	25.6077	0.1979	0.0000	25.8056	-10.0

In [9]:

df1=pd.merge(left=dfSta,right=dfBot,how='right',
             left_on='Station',right_on='Station')
#right join means all rows in right table (dfBot) are included in output
df1.head()

Out[9]:

	Station	Desig	Descrip	Basin	Max_Depth	Latitude	Longitude	Date	UTCDateTime	Z	PO4(uM)D	SiOH4(uM)D	NO3(uM)D	NO2(uM)D	NH4(uM)D	DIN	Znom
0	ADM001	C	Admiralty Inlet - Bush Pt.	Admiralty Inlet	114	48.029813	-122.617933	2019-01-08	2019-01-08 19:27:00	-30.119	2.2138	52.2913	26.2228	0.1347	0.0000	26.3575	-30.0
1	ADM001	C	Admiralty Inlet - Bush Pt.	Admiralty Inlet	114	48.029813	-122.617933	2019-01-08	2019-01-08 19:27:00	-10.262	2.2413	53.5360	26.3984	0.1256	0.0203	26.5443	-10.0
2	ADM001	C	Admiralty Inlet - Bush Pt.	Admiralty Inlet	114	48.029813	-122.617933	2019-01-08	2019-01-08 19:27:00	-0.587	2.2755	56.0817	26.6102	0.1552	0.0347	26.8001	0.0
3	ADM001	C	Admiralty Inlet - Bush Pt.	Admiralty Inlet	114	48.029813	-122.617933	2019-02-06	2019-02-06 19:04:00	-29.444	2.2109	52.4005	25.5163	0.1996	0.0000	25.7159	-30.0
4	ADM001	C	Admiralty Inlet - Bush Pt.	Admiralty Inlet	114	48.029813	-122.617933	2019-02-06	2019-02-06 19:04:00	-9.722	2.2208	51.9906	25.6077	0.1979	0.0000	25.8056	-10.0

In [10]:

# check that there are no stations without lat and lon:
df1.loc[pd.isnull(df1['Latitude'])]

Out[10]:

	Station	Desig	Descrip	Basin	Max_Depth	Latitude	Longitude	Date	UTCDateTime	Z	PO4(uM)D	SiOH4(uM)D	NO3(uM)D	NO2(uM)D	NH4(uM)D	DIN	Znom

In [11]:

# check one to one matches:
len(df1),len(dfBot), len(dfSta)

Out[11]:

(1171, 1171, 39)

In [12]:

# where no time is provided, set time to midday Pacific time = ~ 20:00 UTC for now
# (most sampling takes place during the day)
# accurate times will be provided at a later date
# the code below takes advantage of all elements in 'Date' having a time component 
# set to midnight
df1['UTCDateTime']=[iiD+dt.timedelta(hours=20) if pd.isnull(iiU) \
                    else iiU for iiU,iiD in \
                    zip(df1['UTCDateTime'],df1['Date'])]

In [13]:

# We require the following columns:
#    dtUTC datetime
#    Lat   Latitude
#    Lon   Longitude
#    Z     Depth, increasing downward (positive)
df1.rename(columns={'UTCDateTime':'dtUTC','Latitude':'Lat','Longitude':'Lon'},inplace=True)
df1['Z']=-1*df1['Z']
df1.head()

Out[13]:

	Station	Desig	Descrip	Basin	Max_Depth	Lat	Lon	Date	dtUTC	Z	PO4(uM)D	SiOH4(uM)D	NO3(uM)D	NO2(uM)D	NH4(uM)D	DIN	Znom
0	ADM001	C	Admiralty Inlet - Bush Pt.	Admiralty Inlet	114	48.029813	-122.617933	2019-01-08	2019-01-08 19:27:00	30.119	2.2138	52.2913	26.2228	0.1347	0.0000	26.3575	-30.0
1	ADM001	C	Admiralty Inlet - Bush Pt.	Admiralty Inlet	114	48.029813	-122.617933	2019-01-08	2019-01-08 19:27:00	10.262	2.2413	53.5360	26.3984	0.1256	0.0203	26.5443	-10.0
2	ADM001	C	Admiralty Inlet - Bush Pt.	Admiralty Inlet	114	48.029813	-122.617933	2019-01-08	2019-01-08 19:27:00	0.587	2.2755	56.0817	26.6102	0.1552	0.0347	26.8001	0.0
3	ADM001	C	Admiralty Inlet - Bush Pt.	Admiralty Inlet	114	48.029813	-122.617933	2019-02-06	2019-02-06 19:04:00	29.444	2.2109	52.4005	25.5163	0.1996	0.0000	25.7159	-30.0
4	ADM001	C	Admiralty Inlet - Bush Pt.	Admiralty Inlet	114	48.029813	-122.617933	2019-02-06	2019-02-06 19:04:00	9.722	2.2208	51.9906	25.6077	0.1979	0.0000	25.8056	-10.0

In [14]:

# It will also be useful to add/rename the following columns:
df1['NO23']=df1['NO3(uM)D']+df1['NO2(uM)D'] # the model does not distinguish between NO2 and NO3
df1['Amm']=df1['NH4(uM)D']
df1['Si']=df1['SiOH4(uM)D']
df1['Year']=[ii.year for ii in df1['dtUTC']]
df1['YD']=et.datetimeToYD(df1['dtUTC'])

load CTD data¶

In [15]:

dfCTD0=pickle.load(open(os.path.join(datadir,f'Casts_{str(year)}.p'),'rb'))
dfCTD=pd.merge(left=dfSta,right=dfCTD0,how='right',
             left_on='Station',right_on='Station')
dfCTD['dtUTC']=[iiD+dt.timedelta(hours=20) for iiD in dfCTD['Date']] #Does this mean it also has that flaw where we are not sure when the data was collected?
dfCTD.rename(columns={'Latitude':'Lat','Longitude':'Lon'},inplace=True)
dfCTD['Z']=-1*dfCTD['Z']
# Calculate Absolute (Reference) Salinity (g/kg) and Conservative Temperature (deg C) from 
# Salinity (psu) and Temperature (deg C):
press=gsw.p_from_z(-1*dfCTD['Z'],dfCTD['Lat'])
dfCTD['SA']=gsw.SA_from_SP(dfCTD['Salinity'],press,
                           dfCTD['Lon'],dfCTD['Lat'])
dfCTD['CT']=gsw.CT_from_t(dfCTD['SA'],dfCTD['Temperature'],press)

dfCTD['Year']=[ii.year for ii in dfCTD['dtUTC']]
dfCTD['YD']=et.datetimeToYD(dfCTD['dtUTC'])

In [16]:

dfCTD.keys()

Out[16]:

Index(['Station', 'Desig', 'Descrip', 'Basin', 'Max_Depth', 'Lat', 'Lon',
       'Salinity', 'Temperature', 'Sigma', 'DO', 'Turb', 'Z', 'Date', 'dtUTC',
       'SA', 'CT', 'Year', 'YD'],
      dtype='object')

In [17]:

# check that there is never more than one ctd cast per station per day:
test=dfCTD.groupby(['Station','Year','YD','Z']).count()
print('this should be 1: ',test['Date'].unique())

this should be 1:  [1]

Creating CTD exclusive matched dataset¶

In [18]:

start_date = dt.datetime(year,1,1)
end_date = dt.datetime(year,12,31)
flen=1 # number of days per model output file. always 1 for 201905 and 201812 model runs
namfmt='nowcast' # for 201905 and 201812 model runs, this should always be 'nowcast'
filemap={'vosaline':'grid_T','votemper':'grid_T','diatoms':'ptrc_T','ciliates':'ptrc_T','flagellates':'ptrc_T'}
fdict={'ptrc_T':1,'grid_T':1}

In [19]:

data_CTD=et.matchData(dfCTD,filemap,fdict,start_date,end_date,'nowcast',PATH,1,quiet=False);

(Lat,Lon)= 46.453155 -124.00960333333333  not matched to domain
(Lat,Lon)= 46.463155 -123.94126833333334  not matched to domain
(Lat,Lon)= 46.54537666666667 -123.98016166666666  not matched to domain
(Lat,Lon)= 46.644 -123.993  not matched to domain
(Lat,Lon)= 46.68676333333333 -123.9735  not matched to domain
(Lat,Lon)= 46.703986666666665 -123.837385  not matched to domain
(Lat,Lon)= 46.937313333333336 -123.91322333333333  not matched to domain
(Lat,Lon)= 46.953421666666664 -124.09295  not matched to domain
(Lat,Lon)= 47.21342666666666 -123.07765  not matched to domain
progress: 0.0%
progress: 9.350863084662715%
progress: 18.70172616932543%
progress: 28.052589253988142%
progress: 37.40345233865086%
progress: 46.75431542331357%
progress: 56.105178507976284%
progress: 65.456041592639%
progress: 74.80690467730172%
progress: 84.15776776196444%
progress: 93.50863084662714%

Creating Nutrients matched dataset with added CT and SA from CTD¶

In [20]:

def interpCTDvar(sta,yr,yd,ztarget,ctdvar):
    ctdlocs=(dfCTD.Station==sta)&(dfCTD.Year==yr)&(dfCTD.YD==yd)
    if np.sum(ctdlocs)==0:
        print(f'Warning: Station {sta}, Year {yr}, year day {yd} not found in dfCTD')
        return np.nan
    else:
        val=np.interp(ztarget,dfCTD.loc[ctdlocs,['Z']].values.flatten(),
                  dfCTD.loc[ctdlocs,[ctdvar]].values.flatten())
        return val

In [21]:

dfCTD.loc[dfCTD.Station=='PSS019']['YD'].unique()

Out[21]:

array([  8,  37,  72,  92, 127, 157, 197, 225, 267, 303, 318])

In [22]:

df1.loc[df1.Station=='PSS019']['YD'].unique()

Out[22]:

array([  8,  37,  72,  92, 127, 157, 197, 225, 267, 303, 318])

In [23]:

df1['SA']=[interpCTDvar(sta,yr,yd,ztarget,'SA') for sta, yr, yd, ztarget \
           in zip(df1['Station'],df1['Year'],df1['YD'],df1['Z'])]

Warning: Station GYS008, Year 2019, year day 120 not found in dfCTD
Warning: Station GYS008, Year 2019, year day 302 not found in dfCTD
Warning: Station WPA008, Year 2019, year day 87 not found in dfCTD

In [24]:

df1['CT']=[interpCTDvar(sta,yr,yd,ztarget,'CT') for sta, yr, yd, ztarget \
           in zip(df1['Station'],df1['Year'],df1['YD'],df1['Z'])]

Warning: Station GYS008, Year 2019, year day 120 not found in dfCTD
Warning: Station GYS008, Year 2019, year day 302 not found in dfCTD
Warning: Station WPA008, Year 2019, year day 87 not found in dfCTD

set up variables for model-data matching¶

In [25]:

fdict={'ptrc_T':1,'grid_T':1}
# start_date and end_date are the first and last dates that will 
#  be included in the matched data set
start_date = dt.datetime(year,1,1)
end_date = dt.datetime(year,12,31)
flen=1 # number of days per model output file. always 1 for 201905 and 201812 model runs
namfmt='nowcast' # for 201905 and 201812 model runs, this should always be 'nowcast'
# filemap is dictionary of the form variableName: fileType, where variableName is the name
# of the variable you want to extract and fileType designates the type of 
# model output file it can be found in (usually ptrc_T for biology, grid_T for temperature and 
# salinity)
filemap={'nitrate':'ptrc_T','silicon':'ptrc_T','ammonium':'ptrc_T','diatoms':'ptrc_T',
         'ciliates':'ptrc_T','flagellates':'ptrc_T','votemper':'grid_T','vosaline':'grid_T'}
# fdict is a dictionary mappy file type to its time resolution. Here, 1 means hourly output
# (1h file) and 24 means daily output (1d file). In certain runs, multiple time resolutions 
# are available
fdict={'ptrc_T':1,'grid_T':1}
# Note: to switch between 201812 and 201905 model results, change PATH
# to switch from hourly to daily model output, change fdict values from 1 to 24 (but daily 
#  files are not available for some runs and file types)

In [26]:

data=et.matchData(df1,filemap,fdict,start_date,end_date,'nowcast',PATH,1,quiet=False);

(Lat,Lon)= 46.453155 -124.00960333333333  not matched to domain
(Lat,Lon)= 46.463155 -123.94126833333334  not matched to domain
(Lat,Lon)= 46.54537666666667 -123.98016166666666  not matched to domain
(Lat,Lon)= 46.644 -123.993  not matched to domain
(Lat,Lon)= 46.68676333333333 -123.9735  not matched to domain
(Lat,Lon)= 46.703986666666665 -123.837385  not matched to domain
(Lat,Lon)= 46.937313333333336 -123.91322333333333  not matched to domain
(Lat,Lon)= 46.953421666666664 -124.09295  not matched to domain
(Lat,Lon)= 47.21342666666666 -123.07765  not matched to domain

In [27]:

cm1=cmocean.cm.thermal
with nc.Dataset('/data/eolson/results/MEOPAR/NEMO-forcing-new/grid/bathymetry_201702.nc') as bathy:
    bathylon=np.copy(bathy.variables['nav_lon'][:,:])
    bathylat=np.copy(bathy.variables['nav_lat'][:,:])
    bathyZ=np.copy(bathy.variables['Bathymetry'][:,:])

In [28]:

##### Saving data as Pickle files to be used in the summary file
saveloc='/ocean/kflanaga/MEOPAR/savedData'
with open(os.path.join(saveloc,f'data_WADE_{modelversion}_{year}.pkl'),'wb') as hh:
    pickle.dump(data,hh)
with open(os.path.join(saveloc,f'data_CTD_{modelversion}_{year}.pkl'),'wb') as hh:
    pickle.dump(data_CTD,hh)