#!/usr/bin/env python # coding: utf-8 # ## Comparison of 201905 Model Phytoplankton to HPLC Phytoplankton Abundances from Nina Nemcek # In[1]: import numpy as np # this module handles arrays, but here we need it for its NaN value import pandas as pd # this module contains a lot of tools for handling tabular data from matplotlib import pyplot as plt from salishsea_tools import evaltools as et import datetime as dt import os import gsw import pickle import netCDF4 as nc import cmocean from scipy import stats as spst from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() get_ipython().run_line_magic('matplotlib', 'inline') # Note: crypto+hapto+prasino grouping was actually determined based on comparisons to 201812 model run # ## Load data and matched model output # In[2]: modSourceDir= '/results2/SalishSea/nowcast-green.201905/' modver='201905' Chl_N=1.8 # Chl:N ratio startYMD=(2015,1,1) endYMD=(2018,12,31) # In[3]: start_date = dt.datetime(startYMD[0],startYMD[1],startYMD[2]) end_date = dt.datetime(endYMD[0],endYMD[1],endYMD[2]) #dt.datetime(2019,6,30) # In[4]: datestr='_'+start_date.strftime('%Y%m%d')+'_'+end_date.strftime('%Y%m%d') # In[5]: with nc.Dataset('/ocean/eolson/MEOPAR/NEMO-forcing/grid/mesh_mask201702_noLPE.nc') as mesh: tmask=np.copy(mesh.variables['tmask'][0,:,:,:]) navlat=np.copy(mesh.variables['nav_lat'][:,:]) navlon=np.copy(mesh.variables['nav_lon'][:,:]) # In[6]: def subval(idf,colList): # first value in colList should be the column you are going to keep # follow with other columns that will be used to fill in when that column is NaN # in order of precedence if len(colList)==2: idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \ else r[colList[1]] for i,r in idf.iterrows()] elif len(colList)==3: idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \ else r[colList[1]] if not pd.isna(r[colList[1]]) \ else r[colList[2]] for i,r in idf.iterrows()] else: raise NotImplementedError('Add to code to handle this case') idf.drop(columns=list(colList[1:]),inplace=True) return idf # In[7]: if os.path.isfile('matched_'+modver+datestr+'_NewALLO.pkl'): data=pickle.load(open( 'matched_'+modver+datestr+'_NewALLO.pkl', 'rb' ) ) print('matched_'+modver+datestr+'_NewALLO.pkl') else: # define paths to the source files and eventual output file flist=('/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2015_NewALLO.csv', '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2016_NewALLO.csv', '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2017_NewALLO.csv', '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2018_NewALLO.csv')#, #'/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2019.csv') dfs=list() for fname in flist: idf=pd.read_csv(fname) print(fname,sorted(idf.keys())) dfs.append(idf) df=pd.concat(dfs,ignore_index=True,sort=False); # concatenate the list into a single table df.drop(labels=['ADM:MISSION','ADM:PROJECT','ADM:SCIENTIST','Zone','Zone.1','Temperature:Draw', 'Temperature:Draw [deg C (ITS90)]','Bottle:Firing_Sequence','Comments by sample_numbeR', 'File Name','LOC:EVENT_NUMBER','Number_of_bin_records' ],axis=1,inplace=True) #df=subval(df,('Dictyochophytes','Dictyo')) df=subval(df,('Chlorophyll:Extracted [mg/m^3]','Chlorophyll:Extracted')) #df=subval(df,('Dinoflagellates','Dinoflagellates-1')) df=subval(df,('Fluorescence [mg/m^3]','Fluorescence:URU:Seapoint [mg/m^3]','Fluorescence:URU:Seapoint')) df=subval(df,('Lat','LOC:LATITUDE')) df=subval(df,('Lon','LOC:LONGITUDE')) df=subval(df,('Nitrate_plus_Nitrite [umol/L]','Nitrate_plus_Nitrite')) df=subval(df,('PAR [uE/m^2/sec]','PAR')) df=subval(df,('Phaeo-Pigment:Extracted [mg/m^3]','Phaeo-Pigment:Extracted')) df=subval(df,('Phosphate [umol/L]','Phosphate')) df=subval(df,('Pressure [decibar]','Pressure')) #df=subval(df,('Raphidophytes','Raphido')) df=subval(df,('Salinity','Salinity [PSS-78]','Salinity:T1:C1 [PSS-78]')) df=subval(df,('Salinity:Bottle','Salinity:Bottle [PSS-78]')) df=subval(df,('Silicate [umol/L]','Silicate')) #df=subval(df,('TchlA (ug/L)','TchlA')) df=subval(df,('Temperature','Temperature [deg C (ITS90)]','Temperature:Secondary [deg C (ITS90)]')) df=subval(df,('Transmissivity [*/metre]','Transmissivity')) df['Z']=np.where(pd.isna(df['Depth [metres]']), -1*gsw.z_from_p(df['Pressure [decibar]'].values,df['Lat'].values), df['Depth [metres]']) df['p']=np.where(pd.isna(df['Pressure [decibar]']), gsw.p_from_z(-1*df['Depth [metres]'].values,df['Lat'].values), df['Pressure [decibar]']) df['SA']=gsw.SA_from_SP(df['Salinity'].values,df['p'].values,df['Lon'].values,df['Lat'].values) df['CT']=gsw.CT_from_t(df['SA'].values,df['Temperature'].values,df['p'].values) df.rename({'TchlA':'TchlA (ug/L)','Raphido':'Raphidophytes','Dinoflagellates-1':'Dinoflagellates', 'Dictyo':'Dictyochophytes'},axis=1, inplace=True, errors='raise') df['dtUTC']=[dt.datetime.strptime(ii,'%Y-%m-%d %H:%M:%S') if isinstance(ii,str) else np.nan for ii in df['FIL:START TIME YYYY/MM/DD HH:MM:SS'] ] PATH= modSourceDir flen=1 namfmt='nowcast' #varmap={'N':'nitrate','Si':'silicon','Ammonium':'ammonium'} filemap={'nitrate':'ptrc_T','silicon':'ptrc_T','ammonium':'ptrc_T','diatoms':'ptrc_T','ciliates':'ptrc_T','flagellates':'ptrc_T','vosaline':'grid_T','votemper':'grid_T'} #gridmap={'nitrate':'tmask','silicon':'tmask','ammonium':'tmask'} fdict={'ptrc_T':1,'grid_T':1} data=et.matchData(df,filemap,fdict,start_date,end_date,namfmt,PATH,flen) with open('matched_'+modver+datestr+'_NewALLO.pkl','wb') as f: pickle.dump(data,f) # In[8]: data.keys() # In[9]: data['LOC:STATION'].unique() # In[10]: data['other']=0.0 for el in ('Cryptophytes', 'Cyanobacteria', 'Dictyochophytes', 'Dinoflagellates', 'Haptophytes', 'Prasinophytes', 'Raphidophytes'): data['other']=data['other']+data[el] # In[11]: def yd(idt): if type(idt)==dt.datetime: yd=(idt-dt.datetime(idt.year-1,12,31)).days else: # assume array or pandas yd=[(ii-dt.datetime(ii.year-1,12,31)).days for ii in idt] return yd data['yd']=yd(data['dtUTC']) data['Year']=[ii.year for ii in data['dtUTC']] # In[12]: # define log transform function with slight shift to accommodate zero values def logt(x): return np.log10(x+.001) # ## Determine which HPLC groups have the highest biomass # In[13]: data['Diatoms-1'].mean() ## Highest biomass # In[14]: data['Diatoms-2'].mean() ## include # In[15]: data['Cyanobacteria'].mean() ## exclude due to low biomass # In[16]: data['Cryptophytes'].mean() ## include # In[17]: data['Prasinophytes'].mean() ## include # In[18]: data['Haptophytes'].mean() ## include # In[19]: data['Dictyochophytes'].mean() ## exclude due to low biomass # In[20]: data['Dinoflagellates'].mean() # exclude due to low biomass # In[21]: data['Raphidophytes'].mean() ## Include # In[22]: data['Month']=[ii.month for ii in data['dtUTC']] # In[23]: monthlymean=data.groupby(['Month']).mean() # In[24]: monthlymean['Diatoms-1'] # In[25]: monthlymean['HPLCDiatoms']=(monthlymean['Diatoms-1']+monthlymean['Raphidophytes']+monthlymean['Diatoms-2']) # In[26]: monthlymean['HPLCFlag']=(monthlymean['Cryptophytes']+monthlymean['Haptophytes']+monthlymean['Raphidophytes']) # In[27]: monthlymean['HPLCDiatoms'] # In[28]: monthlysem=logt(data.groupby(['Month']).sem()) # In[29]: monthlymean['L10mod_diatoms']=logt(monthlymean['mod_diatoms']*Chl_N) monthlymean['L10mod_flagellates']=logt(monthlymean['mod_flagellates']*Chl_N) monthlymean['L10Diatoms-1']=logt(monthlymean['Diatoms-1']) monthlymean['L10Diatoms-2']=logt(monthlymean['Diatoms-2']) monthlymean['L10Cryptophytes']=logt(monthlymean['Cryptophytes']) monthlymean['L10Prasinophytes']=logt(monthlymean['Prasinophytes']) monthlymean['L10Haptophytes']=logt(monthlymean['Haptophytes']) monthlymean['L10Raphidophytes']=logt(monthlymean['Raphidophytes']) monthlymean['L10TotalChla']=logt(monthlymean['TchlA (ug/L)']) monthlymean['L10HPLCDiatoms']=logt(monthlymean['HPLCDiatoms']) monthlymean['L10HPLCFlag']=logt(monthlymean['HPLCFlag']) # In[30]: monthlymean['L10mod_diatoms'] # In[31]: # define inverse log transform with same shift def logt_inv(y): return 10**y-.001 # ## Model vs Obs Plots for various model-obs groups # ### Correlation Coefficient Matrix # In[32]: data['mod_diatoms_chl']=Chl_N*data['mod_diatoms'] data['mod_flagellates_chl']=Chl_N*data['mod_flagellates'] data['mod_ciliates_chl']=Chl_N*data['mod_ciliates'] data['mod_TChl']=data['mod_diatoms_chl']+data['mod_flagellates_chl']+data['mod_ciliates_chl'] data['CPH']=data['Cryptophytes']+data['Prasinophytes']+data['Haptophytes'] data['DD']=data['Diatoms-1']+data['Diatoms-2'] dfVars=data.loc[:,['Diatoms-1', 'Diatoms-2','Cyanobacteria','Cryptophytes', 'Prasinophytes', 'Haptophytes', 'Dictyochophytes','Dinoflagellates','Raphidophytes','DD','CPH','TchlA (ug/L)', 'mod_diatoms_chl','mod_flagellates_chl','mod_ciliates_chl','mod_TChl']] # In[33]: dfVars.corr() # ##### Strongest correlations: # Model diatoms and: # - Total chla: 0.343112 # - Diatoms-1: 0.329545 # - Diatoms-1+Diatoms-2: 0.329332 # # Model flagellates and: # - crypto+hapto+prasino: 0.377130 # - haptophytes: 0.309359 # - prasinophytes: 0.288055 # - cryptophytes: 0.283430 # - cyanobacteria: 0.271249 (but remember that cyanobacteria abundances are low) # ### Variance-Covariance Matrix # In[34]: dfVars.cov() # ##### largest covariances: # Model diatoms and: # - TChlA: 4.185330 # - Diatoms-1+Diatoms-2: 2.953355 # - Diatoms-1: 2.849766 # - Raphidophytes: 1.242441 # # Model flagellates and: # - crypto+hapto+prasino: 0.391340 # - haptophytes: 0.150141 # - cryptophytes: 0.144700 # ### Corr Coeff matrix with log transformed values: # In[35]: dflog=pd.DataFrame() for el in ['Diatoms-1', 'Diatoms-2','Cyanobacteria','Cryptophytes', 'Prasinophytes', 'Haptophytes', 'Dictyochophytes','Dinoflagellates','Raphidophytes','CPH','TchlA (ug/L)', 'mod_diatoms_chl','mod_flagellates_chl','mod_ciliates_chl','mod_TChl']: dflog[el]=logt(data[el]) dflog.corr() # ### Cov matrix with log transformed values: # In[36]: dflog.cov() # ### Individual phytoplankton groups compared to model groups (1:1 correspondence not expected) # In[37]: fig,ax=plt.subplots(1,4,figsize=(20,4)) fig.subplots_adjust(wspace=.3) #ax[0].plot(logt(data['Diatoms-1']+data['Raphidophytes']+.5*data['Cryptophytes']),logt(data['mod_diatoms']),'r.') ax[0].plot((data['Diatoms-1']),(data['mod_diatoms']),'g.',ms=5) ax[0].set_ylabel('Model Diatoms \n log10 Chl(mg m$^{-3}$)+.001',fontsize=14) ax[0].set_xlabel('Diatoms-1 \n log10 Chl(mg m$^{-3}$)+.001',fontsize=14) ax[0].set_title('') ax[0].plot((10**-3,10**1.5),(10**-3,10**1.5),'k-',alpha=.2) ax[0].set_xlim((10**-3,10**1.5)) ax[0].set_ylim((10**-3,10**1.5)) ax[0].set_aspect(1) ax[0].set_xscale('log') ax[0].set_yscale('log') ax[0].xaxis.set_tick_params(labelsize=12) ax[0].yaxis.set_tick_params(labelsize=12) ax[0].text(10**-2.8,10**1, 'r = 0.33', fontsize=12, color='k') ax[1].plot((data['Diatoms-2']),(data['mod_diatoms']),'g.',ms=5) ax[1].set_ylabel('') ax[1].set_xlabel('Diatoms-2 \n log10 Chl(mg m$^{-3}$)+.001',fontsize=14) ax[1].plot((10**-3,10**1.5),(10**-3,10**1.5),'k-',alpha=.2) ax[1].set_xlim((10**-3,10**1.5)) ax[1].set_ylim((10**-3,10**1.5)) ax[1].set_aspect(1) ax[1].set_xscale('log') ax[1].set_yscale('log') ax[1].xaxis.set_tick_params(labelsize=12) ax[1].yaxis.set_tick_params(labelsize=12) ax[1].text(10**-2.8,10**1, 'r = 0.07', fontsize=12, color='k') ax[2].plot((data['Raphidophytes']),(data['mod_diatoms']),'g.',ms=5) ax[2].set_ylabel('') ax[2].set_xlabel('Raphidophytes \n log10 Chl(mg m$^{-3}$)+.001',fontsize=14) ax[2].set_title('') ax[2].plot((10**-3,10**1.5),(10**-3,10**1.5),'k-',alpha=.2) ax[2].set_xlim((10**-3,10**1.5)) ax[2].set_ylim((10**-3,10**1.5)) ax[2].set_aspect(1) ax[2].set_xscale('log') ax[2].set_yscale('log') ax[2].xaxis.set_tick_params(labelsize=12) ax[2].yaxis.set_tick_params(labelsize=12) ax[2].text(10**-2.8,10**1, 'r = 0.20', fontsize=12, color='k') ax[3].plot((data['Diatoms-1']+data['Diatoms-2']),(data['mod_diatoms']),'g.',ms=5) ax[3].set_ylabel('') ax[3].set_xlabel('Diatoms-1 + Diatoms-2 \n log10 Chl(mg m$^{-3}$)+.001',fontsize=14) ax[3].set_title('') ax[3].plot((10**-3,10**1.5),(10**-3,10**1.5),'k-',alpha=.2) ax[3].set_xlim((10**-3,10**1.5)) ax[3].set_ylim((10**-3,10**1.5)) ax[3].set_aspect(1) ax[3].set_xscale('log') ax[3].set_yscale('log') ax[3].xaxis.set_tick_params(labelsize=12) ax[3].yaxis.set_tick_params(labelsize=12) ax[3].text(10**-2.8,10**1, 'r = 0.33', fontsize=12, color='k') # In[38]: fig,ax=plt.subplots(1,4,figsize=(20,4)) fig.subplots_adjust(wspace=.3) #ax[0].plot(logt(data['Diatoms-1']+data['Raphidophytes']+.5*data['Cryptophytes']),logt(data['mod_diatoms']),'r.') ax[0].plot((data['Cryptophytes']),(data['mod_flagellates']),'m.',ms=5) ax[0].set_ylabel('Model Nanoflagellates \n log10 Chl(mg m$^{-3}$)+.001',fontsize=14) ax[0].set_xlabel('Cryptophytes \n log10 Chl(mg m$^{-3}$)+.001',fontsize=14) ax[0].set_title('') ax[0].plot((10**-3,10**1.5),(10**-3,10**1.5),'k-',alpha=.2) ax[0].set_xlim((10**-3,10**1.5)) ax[0].set_ylim((10**-3,10**1.5)) ax[0].set_aspect(1) ax[0].set_xscale('log') ax[0].set_yscale('log') ax[0].xaxis.set_tick_params(labelsize=12) ax[0].yaxis.set_tick_params(labelsize=12) ax[0].text(10**-2.8,10**1, 'r = 0.28', fontsize=12, color='k') ax[1].plot((data['Prasinophytes']),(data['mod_flagellates']),'m.',ms=5) ax[1].set_ylabel('') ax[1].set_xlabel('Prasinophytes \n log10 Chl(mg m$^{-3}$)+.001',fontsize=14) ax[1].plot((10**-3,10**1.5),(10**-3,10**1.5),'k-',alpha=.2) ax[1].set_xlim((10**-3,10**1.5)) ax[1].set_ylim((10**-3,10**1.5)) ax[1].set_aspect(1) ax[1].set_xscale('log') ax[1].set_yscale('log') ax[1].xaxis.set_tick_params(labelsize=12) ax[1].yaxis.set_tick_params(labelsize=12) ax[1].text(10**-2.8,10**1, 'r = 0.29', fontsize=12, color='k') ax[2].plot((data['Haptophytes']),(data['mod_flagellates']),'m.',ms=5) ax[2].set_ylabel('') ax[2].set_xlabel('Haptophytes \n log10 Chl(mg m$^{-3}$)+.001',fontsize=14) ax[2].set_title('') ax[2].plot((10**-3,10**1.5),(10**-3,10**1.5),'k-',alpha=.2) ax[2].set_xlim((10**-3,10**1.5)) ax[2].set_ylim((10**-3,10**1.5)) ax[2].set_aspect(1) ax[2].set_xscale('log') ax[2].set_yscale('log') ax[2].xaxis.set_tick_params(labelsize=12) ax[2].yaxis.set_tick_params(labelsize=12) ax[2].text(10**-2.8,10**1, 'r = 0.31', fontsize=12, color='k') ax[3].plot((data['Cryptophytes']+data['Prasinophytes']+data['Haptophytes']),(data['mod_flagellates']),'m.',ms=5) ax[3].set_ylabel('') ax[3].set_xlabel('Crypto + Prasino + Hapto \n log10 Chl(mg m$^{-3}$)+.001',fontsize=14) ax[3].set_title('') ax[3].plot((10**-3,10**1.5),(10**-3,10**1.5),'k-',alpha=.2) ax[3].set_xlim((10**-3,10**1.5)) ax[3].set_ylim((10**-3,10**1.5)) ax[3].set_aspect(1) ax[3].set_xscale('log') ax[3].set_yscale('log') ax[3].xaxis.set_tick_params(labelsize=12) ax[3].yaxis.set_tick_params(labelsize=12) ax[3].text(10**-2.8,10**1, 'r = 0.38', fontsize=12, color='k') # In[ ]: