import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib as mpl
import netCDF4 as nc
import datetime as dt
from salishsea_tools import evaltools as et, places, viz_tools, visualisations
import xarray as xr
import pandas as pd
import pickle
import os
%matplotlib inline
start=dt.datetime(2015,2,15)
end=dt.datetime(2015,6,15)
year=str(start.year)
modver='201812'
loc='S3'
# lat and lon informatin for place:
lon,lat=places.PLACES['S3']['lon lat']
# get place information on SalishSeaCast grid:
ij,ii=places.PLACES['S3']['NEMO grid ji']
# GEM2.5 grid ji is atm forcing grid for ops files
jw,iw=places.PLACES['S3']['GEM2.5 grid ji']
fig, ax = plt.subplots(1,1,figsize = (6,6))
with nc.Dataset('/data/vdo/MEOPAR/NEMO-forcing/grid/bathymetry_201702.nc') as grid:
viz_tools.plot_coastline(ax, grid, coords ='map', isobath=.1)
ax.plot(lon, lat, '.', markersize=14, color='red')
ax.set_ylim(48,50)
ax.set_xlim(-125,-122)
ax.set_title('Location of Station S3')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
Text(0, 0.5, 'Latitude')
savedir='/ocean/aisabell/MEOPAR/extracted_files'
#savedir='/data/eolson/results/MEOPAR'
fname=f'springTimeSeries_{year}_{loc}_{modver}.pkl'
savepath=os.path.join(savedir,fname)
recalc=False
if recalc==True or not os.path.isfile(savepath):
basedir='/results/SalishSea/nowcast-green.201812/'
nam_fmt='nowcast'
flen=1 # files contain 1 day of data each
ftype= 'ptrc_T' # load bio files
tres=24 # 1: hourly resolution; 24: daily resolution
flist=et.index_model_files(start,end,basedir,nam_fmt,flen,ftype,tres)
# flist contains paths: file pathes; t_0 timestemp of start of each file; t_n: timestamp of start of next file
# a list of the files we want between start and end date
print(flist)
fliste3t = et.index_model_files(start,end,basedir,nam_fmt,flen,"carp_T",tres)
ik=0
with xr.open_mfdataset(flist['paths']) as bio:
bio_time=np.array(bio.time_centered[:])
sno3=np.array(bio.nitrate.isel(deptht=ik,y=ij,x=ii))
sdiat=np.array(bio.diatoms.isel(deptht=ik,y=ij,x=ii))
sflag=np.array(bio.flagellates.isel(deptht=ik,y=ij,x=ii))
scili=np.array(bio.ciliates.isel(deptht=ik,y=ij,x=ii))
no3_alld=np.array(bio.nitrate.isel(y=ij,x=ii))
diat_alld=np.array(bio.diatoms.isel(y=ij,x=ii))
flag_alld=np.array(bio.flagellates.isel(y=ij,x=ii))
cili_alld=np.array(bio.ciliates.isel(y=ij,x=ii))
with xr.open_mfdataset(fliste3t['paths']) as carp:
intdiat=np.array(np.sum(bio.diatoms.isel(y=ij,x=ii)*carp.e3t.isel(y=ij,x=ii),1)) # depth integrated diatom
intphyto=np.array(np.sum((bio.diatoms.isel(y=ij,x=ii)+bio.flagellates.isel(y=ij,x=ii)\
+bio.ciliates.isel(y=ij,x=ii))*carp.e3t.isel(y=ij,x=ii),1))
spar=np.array(carp.PAR.isel(deptht=ik,y=ij,x=ii))
fracdiat=intdiat/intphyto # depth integrated fraction of diatoms
sphyto=sdiat+sflag+scili
phyto_alld=diat_alld+flag_alld+cili_alld
percdiat=sdiat/sphyto # percent diatoms
opsdir='/results/forcing/atmospheric/GEM2.5/operational'
flist2=et.index_model_files(start,end,opsdir,nam_fmt='ops',flen=1,ftype='None',tres=24)
with xr.open_mfdataset(flist2['paths']) as winds:
u_wind=np.array(winds.u_wind.isel(y=jw,x=iw))
v_wind=np.array(winds.v_wind.isel(y=jw,x=iw))
twind=np.array(winds.time_counter)
solar=np.array(winds.solar.isel(y=jw,x=iw))
# wind speed:
wspeed=np.sqrt(u_wind**2 + v_wind**2)
# wind direction in degrees from east
d = np.arctan2(v_wind, u_wind)
winddirec=np.rad2deg(d + (d < 0)*2*np.pi)
# reading Fraser river flow files
dfFra=pd.read_csv('/ocean/eolson/MEOPAR/obs/ECRivers/Flow/FraserHopeDaily__Dec-2-2020_10_31_05PM.csv',
skiprows=1)
# the original file contains both flow and water level information in the same field (Value)
# keep only the flow data, where PARAM=1 (drop PARAM=2 values, water level data)
# flow units are m3/s
# DD is YD, year day (ie. 1 is jan 1)
dfFra.drop(dfFra.loc[dfFra.PARAM==2].index,inplace=True)
# rename 'Value' column to 'Flow' now that we have removed all the water level rows
dfFra.rename(columns={'Value':'Flow'}, inplace=True)
# inplace=True does this function on the orginal dataframe
# no time information so use dt.date
dfFra['Date']=[dt.date(iyr,1,1)+dt.timedelta(days=idd-1) for iyr, idd in zip(dfFra['YEAR'],dfFra['DD'])]
# taking the value from the yr column, jan1st date, and making jan1 column to be 1 not 0
dfFra.head(2)
# select portion of dataframe in desired date range
dfFra2=dfFra.loc[(dfFra.Date>=start.date())&(dfFra.Date<=end.date())]
riv_time=dfFra2['Date'].values
rivFlow=dfFra2['Flow'].values
# could also write dfFra['Date'], sometimes this is required
# newstart is a datetime object, so we convert it to just a date with .date
pickle.dump((bio_time,sno3,sdiat,sflag,scili,diat_alld,no3_alld,flag_alld,cili_alld,phyto_alld,intdiat,intphyto,spar,fracdiat,sphyto,percdiat,
u_wind,v_wind,twind,solar,wspeed,winddirec,riv_time,rivFlow),open(savepath,'wb'))
else:
bio_time,sno3,sdiat,sflag,scili,diat_alld,no3_alld,flag_alld,cili_alld,phyto_alld,intdiat,intphyto,spar,fracdiat,sphyto,percdiat,\
u_wind,v_wind,twind,solar,wspeed,winddirec,riv_time,rivFlow=pickle.load(open(savepath,'rb'))
fig,ax=plt.subplots(1,1,figsize=(12,3))
p1=ax.plot(bio_time,sphyto,
'-',color='forestgreen',label='Phytoplankton')
p2=ax.plot(bio_time,sno3,
'-',color='orange',label='Nitrate')
ax.legend(handles=[p1[0],p2[0]],loc=1)
ax.set_ylabel('Concentration ($\mu$M N)')
ax.set_title('Surface Phytoplankton and Nitrate at Station S3')
Text(0.5, 1.0, 'Surface Phytoplankton and Nitrate at Station S3')
a) Average phytoplankton concentration over upper 3 m
b) Average nitrate over upper 3 m
c) Find first location where nitrate crosses below 0.5 micromolar and stays there for 2 days
d) Find date with maximum phytoplankton concentration within four days (say 9 day window) of date in c).
Take first peak, check if it has two days around have concentrations>5, if no, move to next peak
# A dataframe that contains the variables needed to determine the timing of the spring bloom for metric 2 & 3
df = pd.DataFrame({'bio_time':bio_time, 'sphyto':sphyto, 'sno3':sno3})
df
bio_time | sphyto | sno3 | |
---|---|---|---|
0 | 2015-02-15 12:00:00 | 1.255212 | 17.625977 |
1 | 2015-02-16 12:00:00 | 1.320988 | 17.007074 |
2 | 2015-02-17 12:00:00 | 1.458417 | 15.910181 |
3 | 2015-02-18 12:00:00 | 1.579880 | 15.771015 |
4 | 2015-02-19 12:00:00 | 1.729280 | 16.093336 |
... | ... | ... | ... |
116 | 2015-06-11 12:00:00 | 3.340473 | 0.654803 |
117 | 2015-06-12 12:00:00 | 3.568457 | 0.687857 |
118 | 2015-06-13 12:00:00 | 4.321714 | 0.978154 |
119 | 2015-06-14 12:00:00 | 3.935845 | 1.039611 |
120 | 2015-06-15 12:00:00 | 4.036834 | 0.768613 |
121 rows × 3 columns
df.sphyto[(df.sphyto.shift(1) < df.sphyto) & (df.sphyto.shift(-1) < df.sphyto)]
4 1.729280 9 2.591403 17 6.799445 19 7.249289 25 4.688278 27 4.428394 32 3.556840 36 3.740952 41 5.728502 45 6.933609 52 2.370629 55 2.248283 59 4.077229 63 2.983586 67 2.449699 73 4.258922 77 6.421648 82 2.535459 85 1.446307 92 3.555163 101 3.418709 107 1.573672 111 2.291829 118 4.321714 Name: sphyto, dtype: float32
# to find all the peaks
df['phytopeaks'] = df.sphyto[(df.sphyto.shift(1) < df.sphyto) & (df.sphyto.shift(-1) < df.sphyto)]
df
bio_time | sphyto | sno3 | phytopeaks | |
---|---|---|---|---|
0 | 2015-02-15 12:00:00 | 1.255212 | 17.625977 | NaN |
1 | 2015-02-16 12:00:00 | 1.320988 | 17.007074 | NaN |
2 | 2015-02-17 12:00:00 | 1.458417 | 15.910181 | NaN |
3 | 2015-02-18 12:00:00 | 1.579880 | 15.771015 | NaN |
4 | 2015-02-19 12:00:00 | 1.729280 | 16.093336 | 1.729280 |
... | ... | ... | ... | ... |
116 | 2015-06-11 12:00:00 | 3.340473 | 0.654803 | NaN |
117 | 2015-06-12 12:00:00 | 3.568457 | 0.687857 | NaN |
118 | 2015-06-13 12:00:00 | 4.321714 | 0.978154 | 4.321714 |
119 | 2015-06-14 12:00:00 | 3.935845 | 1.039611 | NaN |
120 | 2015-06-15 12:00:00 | 4.036834 | 0.768613 | NaN |
121 rows × 4 columns
df.loc[df.bio_time>dt.datetime(2015,3,28)].head(12)
bio_time | sphyto | sno3 | phytopeaks | |
---|---|---|---|---|
41 | 2015-03-28 12:00:00 | 5.728502 | 5.056458 | 5.728502 |
42 | 2015-03-29 12:00:00 | 5.104787 | 4.266105 | NaN |
43 | 2015-03-30 12:00:00 | 5.648978 | 4.286055 | NaN |
44 | 2015-03-31 12:00:00 | 6.748889 | 4.585369 | NaN |
45 | 2015-04-01 12:00:00 | 6.933609 | 1.795769 | 6.933609 |
46 | 2015-04-02 12:00:00 | 5.489853 | 0.169281 | NaN |
47 | 2015-04-03 12:00:00 | 4.764010 | 0.043544 | NaN |
48 | 2015-04-04 12:00:00 | 4.081254 | 0.226728 | NaN |
49 | 2015-04-05 12:00:00 | 3.674427 | 0.649200 | NaN |
50 | 2015-04-06 12:00:00 | 3.063072 | 0.378349 | NaN |
51 | 2015-04-07 12:00:00 | 2.266260 | 0.095489 | NaN |
52 | 2015-04-08 12:00:00 | 2.370629 | 0.198871 | 2.370629 |
i=45
df['sphyto'].iloc[i-1]>5 and df['sphyto'].iloc[i-2]>5 and pd.notna(df['phytopeaks'].iloc[i])
True
# extract the bloom time date
for i, row in df.iterrows():
if df['sphyto'].iloc[i-1]>5 and df['sphyto'].iloc[i-2]>5 and pd.notna(df['phytopeaks'].iloc[i]):
bloomtime2=df.bio_time[i]
break
elif df['sphyto'].iloc[i+1]>5 and df['sphyto'].iloc[i+2]>5 and pd.notna(df['phytopeaks'].iloc[i]):
bloomtime2=df.bio_time[i]
break
# i dont undestand? this was working before???
bloomtime2
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
Timestamp('2015-03-04 12:00:00')
for i, row in df.iterrows():
try:
if df['sphyto'].iloc[i-1]>5 and df['sphyto'].iloc[i-2]>5 and pd.notna(df['phytopeaks'].iloc[i]):
bloomtime2=df.bio_time[i]
break
elif df['sphyto'].iloc[i+1]>5 and df['sphyto'].iloc[i+2]>5 and pd.notna(df['phytopeaks'].iloc[i]):
bloomtime2=df.bio_time[i]
break
except IndexError:
bloomtime2=np.nan
print('bloom not found')
df.tail()
bio_time | sphyto | sno3 | phytopeaks | |
---|---|---|---|---|
116 | 2015-06-11 12:00:00 | 3.340473 | 0.654803 | NaN |
117 | 2015-06-12 12:00:00 | 3.568457 | 0.687857 | NaN |
118 | 2015-06-13 12:00:00 | 4.321714 | 0.978154 | 4.321714 |
119 | 2015-06-14 12:00:00 | 3.935845 | 1.039611 | NaN |
120 | 2015-06-15 12:00:00 | 4.036834 | 0.768613 | NaN |
i=120
try:
df['sphyto'].iloc[i+1]
except IndexError:
print('not found')
not found
The median + 5% of the annual Chl concentration is deemed “threshold value” for each year.
# 1) determine threshold value
# a) find median chl value of that year
# b) add 5%
# c) secondthresh = find 70% of threshold value
threshold=df['sphyto'].median()*1.05
secondthresh=threshold*0.7
# 2) Take the average of each week and make a dataframe with start date of week and weekly average
weeklychl = pd.DataFrame(df.resample('W', on='bio_time').sphyto.mean())
weeklychl.reset_index(inplace=True)
# 3) Loop through the weeks and find the first week that reaches the threshold. Is one of the two week values after this week > secondthresh?
# yes? that is the bloomtime
# no? go to next week
for i, row in weeklychl.iterrows():
if weeklychl['sphyto'].iloc[i]>threshold and weeklychl['sphyto'].iloc[i+1]>secondthresh:
bloomtime3=df.bio_time[i]
elif weeklychl['sphyto'].iloc[i]>threshold and weeklychl['sphyto'].iloc[i+2]>secondthresh:
bloomtime3=df.bio_time[i]
break
bloomtime3
fig,ax=plt.subplots(1,1,figsize=(12,3))
ax.plot(bio_time,percdiat, '-',color='orchid')
ax.set_ylabel('Diatoms / Total Phytoplankton')
ax.set_title('Fraction of Diatoms in Total Surface Phytoplankton')
ax.set_ylim(0,1)
%%time
fig,ax=plt.subplots(1,1,figsize=(12,3))
ax.plot(bio_time,intphyto,'-',color='forestgreen',label='Phytoplankton')
ax.legend(loc=2);
ax.set_ylabel('Concentration (mmol N/m2)')
ax.set_xlim(bio_time[0],bio_time[-1])
ax.set_title('Depth Integrated Phytoplankton Concentration')
%%time
fig,ax=plt.subplots(1,1,figsize=(12,3))
ax.plot(bio_time,fracdiat,'-',color='orchid')
ax.set_ylabel('Diatoms / Total Phytoplankton')
ax.set_xlim(bio_time[0],bio_time[-1])
ax.set_title('Fraction of Diatoms in Total Depth Integrated Phytoplankton')
ax.set_ylim(0,1)
# plot phytoplankton on top:
fig,ax1=plt.subplots(1,1,figsize=(12,3))
p1=ax1.plot(bio_time,sphyto,
'-',color='forestgreen',label='Phytoplankton')
p2=ax1.plot(bio_time,sno3,
'-',color='orange',label='Nitrate')
ax1.set_ylabel('Concentration ($\mu$M N)')
ax1.set_ylim(0,18)
# Now plot Fraser Flow
ax2=ax1.twinx()
p3=ax2.plot(riv_time,rivFlow,'c-', label='Fraser Flow')
ax2.set_ylabel('Flow (m$^3$s$^{-1}$)')
ax2.set_title('Fraser Flow at Hope and Surface Phytoplankton at Station S3')
ax1.legend(handles=[p1[0],p2[0],p3[0]],loc='upper center')
fig,ax=plt.subplots(1,1,figsize=(18,2))
ax.plot(twind,u_wind,'c-')
ax.plot(twind,v_wind,'b-')
ax.set_xlim(start,end)
ax.set_title('Wind speed')
ax.set_ylabel('m/s')
fig,ax=plt.subplots(1,1,figsize=(20,6))
q=ax.quiver(twind, np.zeros(len(twind)), u_wind, v_wind,scale=200, width=0.001); # change the scale
ax.set_yticklabels([]);
fig.autofmt_xdate(bottom=0.3, rotation=30, ha='right')
yearsFmt = mdates.DateFormatter('%b %d')
ax.xaxis.set_major_formatter(yearsFmt)
ax.set_xlim(start,end)
ax.set_title('Wind Vectors in Geographic Coordinates')
# this can probably be done better?
# calculate daily average wind speed:
ttday=twind[12::24] # start at 12th value and take every 24th
wsdaily=list()
for ii in range(0,int(len(wspeed)/24)):
wsdaily.append(np.mean(wspeed[(ii*24):((ii+1)*24)]))
wsdaily=np.array(wsdaily) # convert to numpy array from list to be able to plot
fig,ax=plt.subplots(1,1,figsize=(18,2))
ax.plot(ttday,wsdaily,'b-')
ax.set_xlim(start,end)
ax.set_title('Daily average wind speed')
ax.set_ylabel('m/s')
wscubed=wsdaily**3
# plot phytoplankton on top:
fig,ax1=plt.subplots(1,1,figsize=(12,3))
p1=ax1.plot(bio_time,sphyto,
'-',color='forestgreen',label='Phytoplankton')
p2=ax1.plot(bio_time,sno3,
'-',color='orange',label='Nitrate')
ax1.set_ylabel('Concentration ($\mu$M N)')
ax1.set_ylim(0,18)
ax2=ax1.twinx()
p3=ax2.plot(ttday,wscubed,'b-',label='Wind Speed Cubed')
ax2.set_xlim(start,end)
ax2.set_title('Daily Average Wind Speed cubed and Surface Phytoplankton at Station S3')
ax2.set_ylabel('$\mathregular{m^3}$/$\mathregular{s^3}$')
ax1.legend(handles=[p1[0],p2[0],p3[0]],loc='upper center')
# plot phytoplankton on top:
fig,ax1=plt.subplots(1,1,figsize=(12,3))
p1=ax1.plot(bio_time,sphyto,
'-',color='forestgreen',label='Phytoplankton')
p2=ax1.plot(bio_time,sno3,
'-',color='orange',label='Nitrate')
ax1.set_ylabel('Concentration ($\mu$M N)')
ax1.set_ylim(0,18)
ax2=ax1.twinx()
p3=ax2.plot(bio_time,spar,
'-',color='red',label='Model PAR')
ax2.set_ylabel('PAR (W/$\mathregular{m^2}$)') # say its model PAR
ax2.set_title('Modeled PAR and Surface Phytoplankton at Station S3')
ax1.legend(handles=[p1[0],p2[0],p3[0]],loc='center left')
fig,ax=plt.subplots(1,1,figsize=(18,2))
ax.plot(twind,solar,'r-')
ax.set_xlim(start,end)
ax.set_title('Solar radiation')
ax.set_ylabel('W/$\mathregular{m^2}$')
# calculate daily average solar radiation:
ttday=twind[12::24] # start at 12th value and take every 24th
solardaily=list()
for ii in range(0,int(len(solar)/24)):
solardaily.append(np.mean(solar[(ii*24):((ii+1)*24)]))
solardaily=np.array(solardaily) # convert to numpy array from list to be able to plot
# plot phytoplankton on top:
fig,ax1=plt.subplots(1,1,figsize=(12,3))
p1=ax1.plot(bio_time,sphyto,
'-',color='forestgreen',label='Phytoplankton')
p2=ax1.plot(bio_time,sno3,
'-',color='orange',label='Nitrate')
ax1.set_ylabel('Concentration ($\mu$M N)')
ax1.set_ylim(0,18)
ax2=ax1.twinx()
p3=ax2.plot(ttday,solardaily,'m-',label='Solar Radiation')
ax2.set_xlim(start,end)
ax2.set_title('Daily Average Solar Radiation and Surface Phytoplankton at Station S3')
ax2.set_ylabel('W/$\mathregular{m^2}$')
ax1.legend(handles=[p1[0],p2[0],p3[0]],loc='upper center')