Time series of spring phytoplankton bloom and model forcing at station S3 from Feb 15th - June 15th 2015¶

In [1]:

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib as mpl
import netCDF4 as nc
import datetime as dt
from salishsea_tools import evaltools as et, places, viz_tools, visualisations
import xarray as xr
import pandas as pd
import pickle
import os

%matplotlib inline

In [2]:

start=dt.datetime(2015,2,15)
end=dt.datetime(2015,6,15)
year=str(start.year)
modver='201812'

Location of station S3¶

In [3]:

loc='S3'

# lat and lon informatin for place:
lon,lat=places.PLACES['S3']['lon lat']
# get place information on SalishSeaCast grid:
ij,ii=places.PLACES['S3']['NEMO grid ji']
# GEM2.5 grid ji is atm forcing grid for ops files
jw,iw=places.PLACES['S3']['GEM2.5 grid ji']

fig, ax = plt.subplots(1,1,figsize = (6,6))
with nc.Dataset('/data/vdo/MEOPAR/NEMO-forcing/grid/bathymetry_201702.nc') as grid:
    viz_tools.plot_coastline(ax, grid, coords ='map', isobath=.1)
ax.plot(lon, lat, '.', markersize=14, color='red')
ax.set_ylim(48,50)
ax.set_xlim(-125,-122)
ax.set_title('Location of Station S3')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')

Out[3]:

Text(0, 0.5, 'Latitude')

load data¶

In [4]:

savedir='/ocean/aisabell/MEOPAR/extracted_files'
#savedir='/data/eolson/results/MEOPAR'
fname=f'springTimeSeries_{year}_{loc}_{modver}.pkl'
savepath=os.path.join(savedir,fname)
recalc=False

In [5]:

if recalc==True or not os.path.isfile(savepath):
    basedir='/results/SalishSea/nowcast-green.201812/'
    nam_fmt='nowcast'
    flen=1 # files contain 1 day of data each
    ftype= 'ptrc_T' # load bio files
    tres=24 # 1: hourly resolution; 24: daily resolution  
    flist=et.index_model_files(start,end,basedir,nam_fmt,flen,ftype,tres)
    # flist contains paths: file pathes; t_0 timestemp of start of each file; t_n: timestamp of start of next file
    # a list of the files we want between start and end date
    print(flist)
    fliste3t = et.index_model_files(start,end,basedir,nam_fmt,flen,"carp_T",tres)

    ik=0
    with xr.open_mfdataset(flist['paths']) as bio:
        bio_time=np.array(bio.time_centered[:])
        sno3=np.array(bio.nitrate.isel(deptht=ik,y=ij,x=ii))
        sdiat=np.array(bio.diatoms.isel(deptht=ik,y=ij,x=ii))
        sflag=np.array(bio.flagellates.isel(deptht=ik,y=ij,x=ii))
        scili=np.array(bio.ciliates.isel(deptht=ik,y=ij,x=ii))
        no3_alld=np.array(bio.nitrate.isel(y=ij,x=ii)) 
        diat_alld=np.array(bio.diatoms.isel(y=ij,x=ii))
        flag_alld=np.array(bio.flagellates.isel(y=ij,x=ii))
        cili_alld=np.array(bio.ciliates.isel(y=ij,x=ii))
        with xr.open_mfdataset(fliste3t['paths']) as carp:
            intdiat=np.array(np.sum(bio.diatoms.isel(y=ij,x=ii)*carp.e3t.isel(y=ij,x=ii),1)) # depth integrated diatom
            intphyto=np.array(np.sum((bio.diatoms.isel(y=ij,x=ii)+bio.flagellates.isel(y=ij,x=ii)\
                            +bio.ciliates.isel(y=ij,x=ii))*carp.e3t.isel(y=ij,x=ii),1))
            spar=np.array(carp.PAR.isel(deptht=ik,y=ij,x=ii))
    fracdiat=intdiat/intphyto # depth integrated fraction of diatoms

    sphyto=sdiat+sflag+scili
    phyto_alld=diat_alld+flag_alld+cili_alld
    percdiat=sdiat/sphyto # percent diatoms

    opsdir='/results/forcing/atmospheric/GEM2.5/operational'

    flist2=et.index_model_files(start,end,opsdir,nam_fmt='ops',flen=1,ftype='None',tres=24)
    with xr.open_mfdataset(flist2['paths']) as winds:
        u_wind=np.array(winds.u_wind.isel(y=jw,x=iw))
        v_wind=np.array(winds.v_wind.isel(y=jw,x=iw))
        twind=np.array(winds.time_counter)
        solar=np.array(winds.solar.isel(y=jw,x=iw))
    # wind speed:
    wspeed=np.sqrt(u_wind**2 + v_wind**2)
    # wind direction in degrees from east
    d = np.arctan2(v_wind, u_wind)
    winddirec=np.rad2deg(d + (d < 0)*2*np.pi)

    # reading Fraser river flow files
    dfFra=pd.read_csv('/ocean/eolson/MEOPAR/obs/ECRivers/Flow/FraserHopeDaily__Dec-2-2020_10_31_05PM.csv',
                      skiprows=1)
    # the original file contains both flow and water level information in the same field (Value)
    # keep only the flow data, where PARAM=1 (drop PARAM=2 values, water level data)
    # flow units are m3/s
    # DD is YD, year day (ie. 1 is jan 1)
    dfFra.drop(dfFra.loc[dfFra.PARAM==2].index,inplace=True)  

    # rename 'Value' column to 'Flow' now that we have removed all the water level rows
    dfFra.rename(columns={'Value':'Flow'}, inplace=True) 
        # inplace=True does this function on the orginal dataframe

    # no time information so use dt.date
    dfFra['Date']=[dt.date(iyr,1,1)+dt.timedelta(days=idd-1) for iyr, idd in zip(dfFra['YEAR'],dfFra['DD'])]
    # taking the value from the yr column, jan1st date, and making jan1 column to be 1 not 0
    dfFra.head(2)

    # select portion of dataframe in desired date range
    dfFra2=dfFra.loc[(dfFra.Date>=start.date())&(dfFra.Date<=end.date())]
    riv_time=dfFra2['Date'].values
    rivFlow=dfFra2['Flow'].values
    # could also write dfFra['Date'], sometimes this is required
    # newstart is a datetime object, so we convert it to just a date with .date
    pickle.dump((bio_time,sno3,sdiat,sflag,scili,diat_alld,no3_alld,flag_alld,cili_alld,phyto_alld,intdiat,intphyto,spar,fracdiat,sphyto,percdiat,
            u_wind,v_wind,twind,solar,wspeed,winddirec,riv_time,rivFlow),open(savepath,'wb'))
else:
    bio_time,sno3,sdiat,sflag,scili,diat_alld,no3_alld,flag_alld,cili_alld,phyto_alld,intdiat,intphyto,spar,fracdiat,sphyto,percdiat,\
            u_wind,v_wind,twind,solar,wspeed,winddirec,riv_time,rivFlow=pickle.load(open(savepath,'rb'))

Total surface phytoplankton and nitrate:¶

In [6]:

fig,ax=plt.subplots(1,1,figsize=(12,3))
p1=ax.plot(bio_time,sphyto,
           '-',color='forestgreen',label='Phytoplankton')
p2=ax.plot(bio_time,sno3,
           '-',color='orange',label='Nitrate')
ax.legend(handles=[p1[0],p2[0]],loc=1)
ax.set_ylabel('Concentration ($\mu$M N)')
ax.set_title('Surface Phytoplankton and Nitrate at Station S3')

Out[6]:

Text(0.5, 1.0, 'Surface Phytoplankton and Nitrate at Station S3')

Spring Bloom Timing Metrics (3 ways)¶

Metric 1: Allen and Wolfe definition: “peak phytoplankton concentration (averaged from the surface to 3 m depth) within four days of the average 0-3 m nitrate concentration going below 0.5 uM (the half-saturation concentration) for two consecutive days”¶

a) Average phytoplankton concentration over upper 3 m

b) Average nitrate over upper 3 m

c) Find first location where nitrate crosses below 0.5 micromolar and stays there for 2 days

d) Find date with maximum phytoplankton concentration within four days (say 9 day window) of date in c).

Metric 2: the first peak in which chlorophyll concentrations are above 5 ug/L for more than two days (Olson et. al 2020)¶

Take first peak, check if it has two days around have concentrations>5, if no, move to next peak

In [9]:

# A dataframe that contains the variables needed to determine the timing of the spring bloom for metric 2 & 3
df = pd.DataFrame({'bio_time':bio_time, 'sphyto':sphyto, 'sno3':sno3})
df

Out[9]:

	bio_time	sphyto	sno3
0	2015-02-15 12:00:00	1.255212	17.625977
1	2015-02-16 12:00:00	1.320988	17.007074
2	2015-02-17 12:00:00	1.458417	15.910181
3	2015-02-18 12:00:00	1.579880	15.771015
4	2015-02-19 12:00:00	1.729280	16.093336
...	...	...	...
116	2015-06-11 12:00:00	3.340473	0.654803
117	2015-06-12 12:00:00	3.568457	0.687857
118	2015-06-13 12:00:00	4.321714	0.978154
119	2015-06-14 12:00:00	3.935845	1.039611
120	2015-06-15 12:00:00	4.036834	0.768613

121 rows × 3 columns

In [11]:

df.sphyto[(df.sphyto.shift(1) < df.sphyto) & (df.sphyto.shift(-1) < df.sphyto)]

Out[11]:

4      1.729280
9      2.591403
17     6.799445
19     7.249289
25     4.688278
27     4.428394
32     3.556840
36     3.740952
41     5.728502
45     6.933609
52     2.370629
55     2.248283
59     4.077229
63     2.983586
67     2.449699
73     4.258922
77     6.421648
82     2.535459
85     1.446307
92     3.555163
101    3.418709
107    1.573672
111    2.291829
118    4.321714
Name: sphyto, dtype: float32

In [10]:

# to find all the peaks
df['phytopeaks'] = df.sphyto[(df.sphyto.shift(1) < df.sphyto) & (df.sphyto.shift(-1) < df.sphyto)]
df

Out[10]:

	bio_time	sphyto	sno3	phytopeaks
0	2015-02-15 12:00:00	1.255212	17.625977	NaN
1	2015-02-16 12:00:00	1.320988	17.007074	NaN
2	2015-02-17 12:00:00	1.458417	15.910181	NaN
3	2015-02-18 12:00:00	1.579880	15.771015	NaN
4	2015-02-19 12:00:00	1.729280	16.093336	1.729280
...	...	...	...	...
116	2015-06-11 12:00:00	3.340473	0.654803	NaN
117	2015-06-12 12:00:00	3.568457	0.687857	NaN
118	2015-06-13 12:00:00	4.321714	0.978154	4.321714
119	2015-06-14 12:00:00	3.935845	1.039611	NaN
120	2015-06-15 12:00:00	4.036834	0.768613	NaN

121 rows × 4 columns

In [11]:

df.loc[df.bio_time>dt.datetime(2015,3,28)].head(12)

Out[11]:

	bio_time	sphyto	sno3	phytopeaks
41	2015-03-28 12:00:00	5.728502	5.056458	5.728502
42	2015-03-29 12:00:00	5.104787	4.266105	NaN
43	2015-03-30 12:00:00	5.648978	4.286055	NaN
44	2015-03-31 12:00:00	6.748889	4.585369	NaN
45	2015-04-01 12:00:00	6.933609	1.795769	6.933609
46	2015-04-02 12:00:00	5.489853	0.169281	NaN
47	2015-04-03 12:00:00	4.764010	0.043544	NaN
48	2015-04-04 12:00:00	4.081254	0.226728	NaN
49	2015-04-05 12:00:00	3.674427	0.649200	NaN
50	2015-04-06 12:00:00	3.063072	0.378349	NaN
51	2015-04-07 12:00:00	2.266260	0.095489	NaN
52	2015-04-08 12:00:00	2.370629	0.198871	2.370629

In [12]:

i=45
df['sphyto'].iloc[i-1]>5 and df['sphyto'].iloc[i-2]>5 and pd.notna(df['phytopeaks'].iloc[i])

Out[12]:

True

In [13]:

# extract the bloom time date
for i, row in df.iterrows():
    if df['sphyto'].iloc[i-1]>5 and df['sphyto'].iloc[i-2]>5 and pd.notna(df['phytopeaks'].iloc[i]):
        bloomtime2=df.bio_time[i]
        break
    elif df['sphyto'].iloc[i+1]>5 and df['sphyto'].iloc[i+2]>5 and pd.notna(df['phytopeaks'].iloc[i]):
        bloomtime2=df.bio_time[i]
        break
# i dont undestand? this was working before???
bloomtime2

Out[13]:

Timestamp('2015-03-04 12:00:00')

In [ ]:

for i, row in df.iterrows():
    try:
        if df['sphyto'].iloc[i-1]>5 and df['sphyto'].iloc[i-2]>5 and pd.notna(df['phytopeaks'].iloc[i]):
            bloomtime2=df.bio_time[i]
            break
        elif df['sphyto'].iloc[i+1]>5 and df['sphyto'].iloc[i+2]>5 and pd.notna(df['phytopeaks'].iloc[i]):
            bloomtime2=df.bio_time[i]
            break
    except IndexError:
        bloomtime2=np.nan
        print('bloom not found')

In [18]:

df.tail()

Out[18]:

	bio_time	sphyto	sno3	phytopeaks
116	2015-06-11 12:00:00	3.340473	0.654803	NaN
117	2015-06-12 12:00:00	3.568457	0.687857	NaN
118	2015-06-13 12:00:00	4.321714	0.978154	4.321714
119	2015-06-14 12:00:00	3.935845	1.039611	NaN
120	2015-06-15 12:00:00	4.036834	0.768613	NaN

In [20]:

i=120
try:
    df['sphyto'].iloc[i+1]
except IndexError:
    print('not found')

not found

Metric 3: For a given year, bloom initiation is determined to be the week that first reaches the threshold value (by looking at weekly averages) as long as one of the two following weeks was >70% of the threshold value. (Karyn’s method)¶

The median + 5% of the annual Chl concentration is deemed “threshold value” for each year.

In [ ]:

# 1) determine threshold value
    # a) find median chl value of that year
    # b) add 5%
    # c) secondthresh = find 70% of threshold value

threshold=df['sphyto'].median()*1.05
secondthresh=threshold*0.7
  
# 2) Take the average of each week and make a dataframe with start date of week and weekly average
weeklychl = pd.DataFrame(df.resample('W', on='bio_time').sphyto.mean())
weeklychl.reset_index(inplace=True)

# 3) Loop through the weeks and find the first week that reaches the threshold. Is one of the two week values after this week > secondthresh? 
        # yes? that is the bloomtime
        # no? go to next week
        
for i, row in weeklychl.iterrows():
    if weeklychl['sphyto'].iloc[i]>threshold and weeklychl['sphyto'].iloc[i+1]>secondthresh:
        bloomtime3=df.bio_time[i]
    elif weeklychl['sphyto'].iloc[i]>threshold and weeklychl['sphyto'].iloc[i+2]>secondthresh:
        bloomtime3=df.bio_time[i]
    break
bloomtime3

Fraction of surface phytoplankton that is diatoms¶

In [ ]:

fig,ax=plt.subplots(1,1,figsize=(12,3))
ax.plot(bio_time,percdiat, '-',color='orchid')
ax.set_ylabel('Diatoms / Total Phytoplankton')
ax.set_title('Fraction of Diatoms in Total Surface Phytoplankton')
ax.set_ylim(0,1)

Depth integrated phytoplankton:¶

In [ ]:

%%time
fig,ax=plt.subplots(1,1,figsize=(12,3))
ax.plot(bio_time,intphyto,'-',color='forestgreen',label='Phytoplankton')
ax.legend(loc=2);
ax.set_ylabel('Concentration (mmol N/m2)')
ax.set_xlim(bio_time[0],bio_time[-1])
ax.set_title('Depth Integrated Phytoplankton Concentration')

Fraction of depth integrated phytoplankton that is diatoms¶

In [ ]:

%%time
fig,ax=plt.subplots(1,1,figsize=(12,3))
ax.plot(bio_time,fracdiat,'-',color='orchid')
ax.set_ylabel('Diatoms / Total Phytoplankton')
ax.set_xlim(bio_time[0],bio_time[-1])
ax.set_title('Fraction of Diatoms in Total Depth Integrated Phytoplankton')
ax.set_ylim(0,1)

Fraser River Flow¶

In [ ]:

# plot phytoplankton on top:
fig,ax1=plt.subplots(1,1,figsize=(12,3))
p1=ax1.plot(bio_time,sphyto,
           '-',color='forestgreen',label='Phytoplankton')
p2=ax1.plot(bio_time,sno3,
           '-',color='orange',label='Nitrate')
ax1.set_ylabel('Concentration ($\mu$M N)')
ax1.set_ylim(0,18)

# Now plot Fraser Flow
ax2=ax1.twinx()
p3=ax2.plot(riv_time,rivFlow,'c-', label='Fraser Flow')
ax2.set_ylabel('Flow (m$^3$s$^{-1}$)')
ax2.set_title('Fraser Flow at Hope and Surface Phytoplankton at Station S3')
ax1.legend(handles=[p1[0],p2[0],p3[0]],loc='upper center')

Forcing (ops): Wind Speed¶

In [ ]:

fig,ax=plt.subplots(1,1,figsize=(18,2))
ax.plot(twind,u_wind,'c-')
ax.plot(twind,v_wind,'b-')
ax.set_xlim(start,end)
ax.set_title('Wind speed')
ax.set_ylabel('m/s')

In [ ]:

fig,ax=plt.subplots(1,1,figsize=(20,6))
q=ax.quiver(twind, np.zeros(len(twind)), u_wind, v_wind,scale=200, width=0.001); # change the scale
ax.set_yticklabels([]);
fig.autofmt_xdate(bottom=0.3, rotation=30, ha='right')
yearsFmt = mdates.DateFormatter('%b %d')
ax.xaxis.set_major_formatter(yearsFmt)
ax.set_xlim(start,end)
ax.set_title('Wind Vectors in Geographic Coordinates')
# this can probably be done better?

Daily average wind speed¶

In [ ]:

# calculate daily average wind speed:
ttday=twind[12::24] # start at 12th value and take every 24th
wsdaily=list()
for ii in range(0,int(len(wspeed)/24)):
    wsdaily.append(np.mean(wspeed[(ii*24):((ii+1)*24)]))
wsdaily=np.array(wsdaily) # convert to numpy array from list to be able to plot

In [ ]:

fig,ax=plt.subplots(1,1,figsize=(18,2))
ax.plot(ttday,wsdaily,'b-')
ax.set_xlim(start,end)
ax.set_title('Daily average wind speed')
ax.set_ylabel('m/s')

Daily average wind speed cubed¶

In [ ]:

wscubed=wsdaily**3

# plot phytoplankton on top:
fig,ax1=plt.subplots(1,1,figsize=(12,3))
p1=ax1.plot(bio_time,sphyto,
           '-',color='forestgreen',label='Phytoplankton')
p2=ax1.plot(bio_time,sno3,
           '-',color='orange',label='Nitrate')
ax1.set_ylabel('Concentration ($\mu$M N)')
ax1.set_ylim(0,18)

ax2=ax1.twinx()
p3=ax2.plot(ttday,wscubed,'b-',label='Wind Speed Cubed')
ax2.set_xlim(start,end)
ax2.set_title('Daily Average Wind Speed cubed and Surface Phytoplankton at Station S3')
ax2.set_ylabel('$\mathregular{m^3}$/$\mathregular{s^3}$')
ax1.legend(handles=[p1[0],p2[0],p3[0]],loc='upper center')

Photosynthetically Available Radiation (PAR) at Surface¶

In [ ]:

# plot phytoplankton on top:
fig,ax1=plt.subplots(1,1,figsize=(12,3))
p1=ax1.plot(bio_time,sphyto,
           '-',color='forestgreen',label='Phytoplankton')
p2=ax1.plot(bio_time,sno3,
           '-',color='orange',label='Nitrate')
ax1.set_ylabel('Concentration ($\mu$M N)')
ax1.set_ylim(0,18)

ax2=ax1.twinx()
p3=ax2.plot(bio_time,spar,
           '-',color='red',label='Model PAR')
ax2.set_ylabel('PAR (W/$\mathregular{m^2}$)') # say its model PAR
ax2.set_title('Modeled PAR and Surface Phytoplankton at Station S3')
ax1.legend(handles=[p1[0],p2[0],p3[0]],loc='center left')

Forcing: Solar radiation¶

In [ ]:

fig,ax=plt.subplots(1,1,figsize=(18,2))
ax.plot(twind,solar,'r-')
ax.set_xlim(start,end)
ax.set_title('Solar radiation')
ax.set_ylabel('W/$\mathregular{m^2}$')

In [ ]:

# calculate daily average solar radiation:
ttday=twind[12::24] # start at 12th value and take every 24th
solardaily=list()
for ii in range(0,int(len(solar)/24)):
    solardaily.append(np.mean(solar[(ii*24):((ii+1)*24)]))
solardaily=np.array(solardaily) # convert to numpy array from list to be able to plot

In [ ]:

# plot phytoplankton on top:
fig,ax1=plt.subplots(1,1,figsize=(12,3))
p1=ax1.plot(bio_time,sphyto,
           '-',color='forestgreen',label='Phytoplankton')
p2=ax1.plot(bio_time,sno3,
           '-',color='orange',label='Nitrate')
ax1.set_ylabel('Concentration ($\mu$M N)')
ax1.set_ylim(0,18)

ax2=ax1.twinx()
p3=ax2.plot(ttday,solardaily,'m-',label='Solar Radiation')
ax2.set_xlim(start,end)
ax2.set_title('Daily Average Solar Radiation and Surface Phytoplankton at Station S3')
ax2.set_ylabel('W/$\mathregular{m^2}$')
ax1.legend(handles=[p1[0],p2[0],p3[0]],loc='upper center')

In [ ]: