This Script only exists as a place to load up all of the pickle files files while I am busy doing other things.
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import netCDF4 as nc
import datetime as dt
from salishsea_tools import evaltools as et, viz_tools
import gsw
import matplotlib.gridspec as gridspec
import matplotlib as mpl
import matplotlib.dates as mdates
import cmocean as cmo
import scipy.interpolate as sinterp
import pickle
import cmocean
import json
import f90nml
from collections import OrderedDict
from matplotlib.colors import LogNorm
fs=16
mpl.rc('xtick', labelsize=fs)
mpl.rc('ytick', labelsize=fs)
mpl.rc('legend', fontsize=fs)
mpl.rc('axes', titlesize=fs)
mpl.rc('axes', labelsize=fs)
mpl.rc('figure', titlesize=fs)
mpl.rc('font', size=fs)
mpl.rc('font', family='sans-serif', weight='normal', style='normal')
import warnings
#warnings.filterwarnings('ignore')
from IPython.display import Markdown, display
%matplotlib inline
year=2019
modelversion='nowcast-green.201905'
PATH= '/results2/SalishSea/nowcast-green.201905/'
datadir='/ocean/eolson/MEOPAR/obs/WADE/ptools_data/ecology'
dfSta=pickle.load(open(os.path.join(datadir,'sta_df.p'),'rb'))
dfSta.head()
Desig | Descrip | Basin | Max_Depth | Latitude | Longitude | |
---|---|---|---|---|---|---|
Station | ||||||
ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 |
ADM002 | C | Admiralty Inlet (north) - Quimper Pn. | Admiralty Inlet | 79 | 48.187318 | -122.842950 |
ADM003 | C | Admiralty Inlet (south) | Admiralty Inlet | 118 | 47.878983 | -122.483195 |
BLL009 | C | Bellingham Bay - Pt. Frances | Strait of Georgia | 31 | 48.685940 | -122.599618 |
BUD005 | C | Budd Inlet - Olympia Shoal | South Basin | 22 | 47.092040 | -122.918197 |
dfBot=pickle.load(open(os.path.join(datadir,f'Bottles_{str(year)}.p'),'rb'))
dfBot.head()
Station | Date | UTCDateTime | Z | PO4(uM)D | SiOH4(uM)D | NO3(uM)D | NO2(uM)D | NH4(uM)D | DIN | Znom | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | ADM001 | 2019-01-08 | 2019-01-08 19:27:00 | -30.119 | 2.2138 | 52.2913 | 26.2228 | 0.1347 | 0.0000 | 26.3575 | -30.0 |
1 | ADM001 | 2019-01-08 | 2019-01-08 19:27:00 | -10.262 | 2.2413 | 53.5360 | 26.3984 | 0.1256 | 0.0203 | 26.5443 | -10.0 |
2 | ADM001 | 2019-01-08 | 2019-01-08 19:27:00 | -0.587 | 2.2755 | 56.0817 | 26.6102 | 0.1552 | 0.0347 | 26.8001 | 0.0 |
3 | ADM001 | 2019-02-06 | 2019-02-06 19:04:00 | -29.444 | 2.2109 | 52.4005 | 25.5163 | 0.1996 | 0.0000 | 25.7159 | -30.0 |
4 | ADM001 | 2019-02-06 | 2019-02-06 19:04:00 | -9.722 | 2.2208 | 51.9906 | 25.6077 | 0.1979 | 0.0000 | 25.8056 | -10.0 |
df1=pd.merge(left=dfSta,right=dfBot,how='right',
left_on='Station',right_on='Station')
#right join means all rows in right table (dfBot) are included in output
df1.head()
Station | Desig | Descrip | Basin | Max_Depth | Latitude | Longitude | Date | UTCDateTime | Z | PO4(uM)D | SiOH4(uM)D | NO3(uM)D | NO2(uM)D | NH4(uM)D | DIN | Znom | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 2019-01-08 | 2019-01-08 19:27:00 | -30.119 | 2.2138 | 52.2913 | 26.2228 | 0.1347 | 0.0000 | 26.3575 | -30.0 |
1 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 2019-01-08 | 2019-01-08 19:27:00 | -10.262 | 2.2413 | 53.5360 | 26.3984 | 0.1256 | 0.0203 | 26.5443 | -10.0 |
2 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 2019-01-08 | 2019-01-08 19:27:00 | -0.587 | 2.2755 | 56.0817 | 26.6102 | 0.1552 | 0.0347 | 26.8001 | 0.0 |
3 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 2019-02-06 | 2019-02-06 19:04:00 | -29.444 | 2.2109 | 52.4005 | 25.5163 | 0.1996 | 0.0000 | 25.7159 | -30.0 |
4 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 2019-02-06 | 2019-02-06 19:04:00 | -9.722 | 2.2208 | 51.9906 | 25.6077 | 0.1979 | 0.0000 | 25.8056 | -10.0 |
# check that there are no stations without lat and lon:
df1.loc[pd.isnull(df1['Latitude'])]
Station | Desig | Descrip | Basin | Max_Depth | Latitude | Longitude | Date | UTCDateTime | Z | PO4(uM)D | SiOH4(uM)D | NO3(uM)D | NO2(uM)D | NH4(uM)D | DIN | Znom |
---|
# check one to one matches:
len(df1),len(dfBot), len(dfSta)
(1171, 1171, 39)
# where no time is provided, set time to midday Pacific time = ~ 20:00 UTC for now
# (most sampling takes place during the day)
# accurate times will be provided at a later date
# the code below takes advantage of all elements in 'Date' having a time component
# set to midnight
df1['UTCDateTime']=[iiD+dt.timedelta(hours=20) if pd.isnull(iiU) \
else iiU for iiU,iiD in \
zip(df1['UTCDateTime'],df1['Date'])]
# We require the following columns:
# dtUTC datetime
# Lat Latitude
# Lon Longitude
# Z Depth, increasing downward (positive)
df1.rename(columns={'UTCDateTime':'dtUTC','Latitude':'Lat','Longitude':'Lon'},inplace=True)
df1['Z']=-1*df1['Z']
df1.head()
Station | Desig | Descrip | Basin | Max_Depth | Lat | Lon | Date | dtUTC | Z | PO4(uM)D | SiOH4(uM)D | NO3(uM)D | NO2(uM)D | NH4(uM)D | DIN | Znom | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 2019-01-08 | 2019-01-08 19:27:00 | 30.119 | 2.2138 | 52.2913 | 26.2228 | 0.1347 | 0.0000 | 26.3575 | -30.0 |
1 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 2019-01-08 | 2019-01-08 19:27:00 | 10.262 | 2.2413 | 53.5360 | 26.3984 | 0.1256 | 0.0203 | 26.5443 | -10.0 |
2 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 2019-01-08 | 2019-01-08 19:27:00 | 0.587 | 2.2755 | 56.0817 | 26.6102 | 0.1552 | 0.0347 | 26.8001 | 0.0 |
3 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 2019-02-06 | 2019-02-06 19:04:00 | 29.444 | 2.2109 | 52.4005 | 25.5163 | 0.1996 | 0.0000 | 25.7159 | -30.0 |
4 | ADM001 | C | Admiralty Inlet - Bush Pt. | Admiralty Inlet | 114 | 48.029813 | -122.617933 | 2019-02-06 | 2019-02-06 19:04:00 | 9.722 | 2.2208 | 51.9906 | 25.6077 | 0.1979 | 0.0000 | 25.8056 | -10.0 |
# It will also be useful to add/rename the following columns:
df1['NO23']=df1['NO3(uM)D']+df1['NO2(uM)D'] # the model does not distinguish between NO2 and NO3
df1['Amm']=df1['NH4(uM)D']
df1['Si']=df1['SiOH4(uM)D']
df1['Year']=[ii.year for ii in df1['dtUTC']]
df1['YD']=et.datetimeToYD(df1['dtUTC'])
dfCTD0=pickle.load(open(os.path.join(datadir,f'Casts_{str(year)}.p'),'rb'))
dfCTD=pd.merge(left=dfSta,right=dfCTD0,how='right',
left_on='Station',right_on='Station')
dfCTD['dtUTC']=[iiD+dt.timedelta(hours=20) for iiD in dfCTD['Date']] #Does this mean it also has that flaw where we are not sure when the data was collected?
dfCTD.rename(columns={'Latitude':'Lat','Longitude':'Lon'},inplace=True)
dfCTD['Z']=-1*dfCTD['Z']
# Calculate Absolute (Reference) Salinity (g/kg) and Conservative Temperature (deg C) from
# Salinity (psu) and Temperature (deg C):
press=gsw.p_from_z(-1*dfCTD['Z'],dfCTD['Lat'])
dfCTD['SA']=gsw.SA_from_SP(dfCTD['Salinity'],press,
dfCTD['Lon'],dfCTD['Lat'])
dfCTD['CT']=gsw.CT_from_t(dfCTD['SA'],dfCTD['Temperature'],press)
dfCTD['Year']=[ii.year for ii in dfCTD['dtUTC']]
dfCTD['YD']=et.datetimeToYD(dfCTD['dtUTC'])
dfCTD.keys()
Index(['Station', 'Desig', 'Descrip', 'Basin', 'Max_Depth', 'Lat', 'Lon', 'Salinity', 'Temperature', 'Sigma', 'DO', 'Turb', 'Z', 'Date', 'dtUTC', 'SA', 'CT', 'Year', 'YD'], dtype='object')
# check that there is never more than one ctd cast per station per day:
test=dfCTD.groupby(['Station','Year','YD','Z']).count()
print('this should be 1: ',test['Date'].unique())
this should be 1: [1]
start_date = dt.datetime(year,1,1)
end_date = dt.datetime(year,12,31)
flen=1 # number of days per model output file. always 1 for 201905 and 201812 model runs
namfmt='nowcast' # for 201905 and 201812 model runs, this should always be 'nowcast'
filemap={'vosaline':'grid_T','votemper':'grid_T','diatoms':'ptrc_T','ciliates':'ptrc_T','flagellates':'ptrc_T'}
fdict={'ptrc_T':1,'grid_T':1}
data_CTD=et.matchData(dfCTD,filemap,fdict,start_date,end_date,'nowcast',PATH,1,quiet=False);
(Lat,Lon)= 46.453155 -124.00960333333333 not matched to domain (Lat,Lon)= 46.463155 -123.94126833333334 not matched to domain (Lat,Lon)= 46.54537666666667 -123.98016166666666 not matched to domain (Lat,Lon)= 46.644 -123.993 not matched to domain (Lat,Lon)= 46.68676333333333 -123.9735 not matched to domain (Lat,Lon)= 46.703986666666665 -123.837385 not matched to domain (Lat,Lon)= 46.937313333333336 -123.91322333333333 not matched to domain (Lat,Lon)= 46.953421666666664 -124.09295 not matched to domain (Lat,Lon)= 47.21342666666666 -123.07765 not matched to domain progress: 0.0% progress: 9.350863084662715% progress: 18.70172616932543% progress: 28.052589253988142% progress: 37.40345233865086% progress: 46.75431542331357% progress: 56.105178507976284% progress: 65.456041592639% progress: 74.80690467730172% progress: 84.15776776196444% progress: 93.50863084662714%
def interpCTDvar(sta,yr,yd,ztarget,ctdvar):
ctdlocs=(dfCTD.Station==sta)&(dfCTD.Year==yr)&(dfCTD.YD==yd)
if np.sum(ctdlocs)==0:
print(f'Warning: Station {sta}, Year {yr}, year day {yd} not found in dfCTD')
return np.nan
else:
val=np.interp(ztarget,dfCTD.loc[ctdlocs,['Z']].values.flatten(),
dfCTD.loc[ctdlocs,[ctdvar]].values.flatten())
return val
dfCTD.loc[dfCTD.Station=='PSS019']['YD'].unique()
array([ 8, 37, 72, 92, 127, 157, 197, 225, 267, 303, 318])
df1.loc[df1.Station=='PSS019']['YD'].unique()
array([ 8, 37, 72, 92, 127, 157, 197, 225, 267, 303, 318])
df1['SA']=[interpCTDvar(sta,yr,yd,ztarget,'SA') for sta, yr, yd, ztarget \
in zip(df1['Station'],df1['Year'],df1['YD'],df1['Z'])]
Warning: Station GYS008, Year 2019, year day 120 not found in dfCTD Warning: Station GYS008, Year 2019, year day 302 not found in dfCTD Warning: Station WPA008, Year 2019, year day 87 not found in dfCTD
df1['CT']=[interpCTDvar(sta,yr,yd,ztarget,'CT') for sta, yr, yd, ztarget \
in zip(df1['Station'],df1['Year'],df1['YD'],df1['Z'])]
Warning: Station GYS008, Year 2019, year day 120 not found in dfCTD Warning: Station GYS008, Year 2019, year day 302 not found in dfCTD Warning: Station WPA008, Year 2019, year day 87 not found in dfCTD
fdict={'ptrc_T':1,'grid_T':1}
# start_date and end_date are the first and last dates that will
# be included in the matched data set
start_date = dt.datetime(year,1,1)
end_date = dt.datetime(year,12,31)
flen=1 # number of days per model output file. always 1 for 201905 and 201812 model runs
namfmt='nowcast' # for 201905 and 201812 model runs, this should always be 'nowcast'
# filemap is dictionary of the form variableName: fileType, where variableName is the name
# of the variable you want to extract and fileType designates the type of
# model output file it can be found in (usually ptrc_T for biology, grid_T for temperature and
# salinity)
filemap={'nitrate':'ptrc_T','silicon':'ptrc_T','ammonium':'ptrc_T','diatoms':'ptrc_T',
'ciliates':'ptrc_T','flagellates':'ptrc_T','votemper':'grid_T','vosaline':'grid_T'}
# fdict is a dictionary mappy file type to its time resolution. Here, 1 means hourly output
# (1h file) and 24 means daily output (1d file). In certain runs, multiple time resolutions
# are available
fdict={'ptrc_T':1,'grid_T':1}
# Note: to switch between 201812 and 201905 model results, change PATH
# to switch from hourly to daily model output, change fdict values from 1 to 24 (but daily
# files are not available for some runs and file types)
data=et.matchData(df1,filemap,fdict,start_date,end_date,'nowcast',PATH,1,quiet=False);
(Lat,Lon)= 46.453155 -124.00960333333333 not matched to domain (Lat,Lon)= 46.463155 -123.94126833333334 not matched to domain (Lat,Lon)= 46.54537666666667 -123.98016166666666 not matched to domain (Lat,Lon)= 46.644 -123.993 not matched to domain (Lat,Lon)= 46.68676333333333 -123.9735 not matched to domain (Lat,Lon)= 46.703986666666665 -123.837385 not matched to domain (Lat,Lon)= 46.937313333333336 -123.91322333333333 not matched to domain (Lat,Lon)= 46.953421666666664 -124.09295 not matched to domain (Lat,Lon)= 47.21342666666666 -123.07765 not matched to domain
cm1=cmocean.cm.thermal
with nc.Dataset('/data/eolson/results/MEOPAR/NEMO-forcing-new/grid/bathymetry_201702.nc') as bathy:
bathylon=np.copy(bathy.variables['nav_lon'][:,:])
bathylat=np.copy(bathy.variables['nav_lat'][:,:])
bathyZ=np.copy(bathy.variables['Bathymetry'][:,:])
##### Saving data as Pickle files to be used in the summary file
saveloc='/ocean/kflanaga/MEOPAR/savedData'
with open(os.path.join(saveloc,f'data_WADE_{modelversion}_{year}.pkl'),'wb') as hh:
pickle.dump(data,hh)
with open(os.path.join(saveloc,f'data_CTD_{modelversion}_{year}.pkl'),'wb') as hh:
pickle.dump(data_CTD,hh)