#!/usr/bin/env python # coding: utf-8 # ## Comparison of 201905 Model Phytoplankton to HPLC Phytoplankton Abundances from Nina Nemcek # In[1]: import numpy as np # this module handles arrays, but here we need it for its NaN value import pandas as pd # this module contains a lot of tools for handling tabular data from matplotlib import pyplot as plt from salishsea_tools import evaltools as et import datetime as dt import os import gsw import pickle import netCDF4 as nc import cmocean from scipy import stats as spst from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() get_ipython().run_line_magic('matplotlib', 'inline') # ## Load data and matched model output # In[2]: modSourceDir= '/results2/SalishSea/nowcast-green.201905/' modver='201905' Chl_N=1.8 # Chl:N ratio startYMD=(2015,1,1) endYMD=(2018,12,31) # In[3]: start_date = dt.datetime(startYMD[0],startYMD[1],startYMD[2]) end_date = dt.datetime(endYMD[0],endYMD[1],endYMD[2]) #dt.datetime(2019,6,30) # In[4]: datestr='_'+start_date.strftime('%Y%m%d')+'_'+end_date.strftime('%Y%m%d') # In[5]: with nc.Dataset('/ocean/eolson/MEOPAR/NEMO-forcing/grid/mesh_mask201702_noLPE.nc') as mesh: tmask=np.copy(mesh.variables['tmask'][0,:,:,:]) navlat=np.copy(mesh.variables['nav_lat'][:,:]) navlon=np.copy(mesh.variables['nav_lon'][:,:]) # In[6]: def subval(idf,colList): # first value in colList should be the column you are going to keep # follow with other columns that will be used to fill in when that column is NaN # in order of precedence if len(colList)==2: idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \ else r[colList[1]] for i,r in idf.iterrows()] elif len(colList)==3: idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \ else r[colList[1]] if not pd.isna(r[colList[1]]) \ else r[colList[2]] for i,r in idf.iterrows()] else: raise NotImplementedError('Add to code to handle this case') idf.drop(columns=list(colList[1:]),inplace=True) return idf # In[7]: if os.path.isfile('matched_'+modver+datestr+'_NewALLO.pkl'): data=pickle.load(open( 'matched_'+modver+datestr+'_NewALLO.pkl', 'rb' ) ) else: # define paths to the source files and eventual output file flist=('/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2015_NewALLO.csv', '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2016_NewALLO.csv', '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2017_NewALLO.csv', '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2018_NewALLO.csv')#, #'/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2019.csv') dfs=list() for fname in flist: idf=pd.read_csv(fname) print(fname,sorted(idf.keys())) dfs.append(idf) df=pd.concat(dfs,ignore_index=True,sort=False); # concatenate the list into a single table df.drop(labels=['ADM:MISSION','ADM:PROJECT','ADM:SCIENTIST','Zone','Zone.1','Temperature:Draw', 'Temperature:Draw [deg C (ITS90)]','Bottle:Firing_Sequence','Comments by sample_numbeR', 'File Name','LOC:EVENT_NUMBER','Number_of_bin_records' ],axis=1,inplace=True) #df=subval(df,('Dictyochophytes','Dictyo')) df=subval(df,('Chlorophyll:Extracted [mg/m^3]','Chlorophyll:Extracted')) #df=subval(df,('Dinoflagellates','Dinoflagellates-1')) df=subval(df,('Fluorescence [mg/m^3]','Fluorescence:URU:Seapoint [mg/m^3]','Fluorescence:URU:Seapoint')) df=subval(df,('Lat','LOC:LATITUDE')) df=subval(df,('Lon','LOC:LONGITUDE')) df=subval(df,('Nitrate_plus_Nitrite [umol/L]','Nitrate_plus_Nitrite')) df=subval(df,('PAR [uE/m^2/sec]','PAR')) df=subval(df,('Phaeo-Pigment:Extracted [mg/m^3]','Phaeo-Pigment:Extracted')) df=subval(df,('Phosphate [umol/L]','Phosphate')) df=subval(df,('Pressure [decibar]','Pressure')) #df=subval(df,('Raphidophytes','Raphido')) df=subval(df,('Salinity','Salinity [PSS-78]','Salinity:T1:C1 [PSS-78]')) df=subval(df,('Salinity:Bottle','Salinity:Bottle [PSS-78]')) df=subval(df,('Silicate [umol/L]','Silicate')) #df=subval(df,('TchlA (ug/L)','TchlA')) df=subval(df,('Temperature','Temperature [deg C (ITS90)]','Temperature:Secondary [deg C (ITS90)]')) df=subval(df,('Transmissivity [*/metre]','Transmissivity')) df['Z']=np.where(pd.isna(df['Depth [metres]']), -1*gsw.z_from_p(df['Pressure [decibar]'].values,df['Lat'].values), df['Depth [metres]']) df['p']=np.where(pd.isna(df['Pressure [decibar]']), gsw.p_from_z(-1*df['Depth [metres]'].values,df['Lat'].values), df['Pressure [decibar]']) df['SA']=gsw.SA_from_SP(df['Salinity'].values,df['p'].values,df['Lon'].values,df['Lat'].values) df['CT']=gsw.CT_from_t(df['SA'].values,df['Temperature'].values,df['p'].values) df.rename({'TchlA':'TchlA (ug/L)','Raphido':'Raphidophytes','Dinoflagellates-1':'Dinoflagellates', 'Dictyo':'Dictyochophytes'},axis=1, inplace=True, errors='raise') df['dtUTC']=[dt.datetime.strptime(ii,'%Y-%m-%d %H:%M:%S') if isinstance(ii,str) else np.nan for ii in df['FIL:START TIME YYYY/MM/DD HH:MM:SS'] ] PATH= modSourceDir flen=1 namfmt='nowcast' #varmap={'N':'nitrate','Si':'silicon','Ammonium':'ammonium'} filemap={'nitrate':'ptrc_T','silicon':'ptrc_T','ammonium':'ptrc_T','diatoms':'ptrc_T','ciliates':'ptrc_T','flagellates':'ptrc_T','vosaline':'grid_T','votemper':'grid_T'} #gridmap={'nitrate':'tmask','silicon':'tmask','ammonium':'tmask'} fdict={'ptrc_T':1,'grid_T':1} data=et.matchData(df,filemap,fdict,start_date,end_date,namfmt,PATH,flen) with open('matched_'+modver+datestr+'_NewALLO.pkl','wb') as f: pickle.dump(data,f) # In[8]: data['other']=0.0 for el in ('Cryptophytes', 'Cyanobacteria', 'Dictyochophytes', 'Dinoflagellates', 'Haptophytes', 'Prasinophytes', 'Raphidophytes'): data['other']=data['other']+data[el] # In[9]: def yd(idt): if type(idt)==dt.datetime: yd=(idt-dt.datetime(idt.year-1,12,31)).days else: # assume array or pandas yd=[(ii-dt.datetime(ii.year-1,12,31)).days for ii in idt] return yd data['yd']=yd(data['dtUTC']) data['Year']=[ii.year for ii in data['dtUTC']] # In[10]: # define log transform function with slight shift to accommodate zero values def logt(x): return np.log10(x+.001) # ### What do HPLC-based observations look like in terms of species composition? # In[11]: hplclist=('Diatoms-1', 'Diatoms-2','Cyanobacteria','Cryptophytes', 'Prasinophytes', 'Haptophytes', 'Dictyochophytes','Dinoflagellates','Raphidophytes') # In[12]: ### Histogram by Year: for year in range(2015,2019): fig,ax=plt.subplots(1,1,figsize=(12,2.5)) hplclist=('Diatoms-1', 'Diatoms-2','Cyanobacteria','Cryptophytes', 'Prasinophytes', 'Haptophytes', 'Dictyochophytes','Dinoflagellates','Raphidophytes') x=np.array([logt(data[data.Year==year][el]) for el in hplclist]).T cols=('darkblue','cornflowerblue','cyan','olive','limegreen','magenta','orange','firebrick','plum') bins=np.linspace(-3,2,10) ax.hist(x, bins, density=False, histtype='bar', color=cols, label=hplclist); fig.legend(prop={'size': 12},loc='center right',bbox_to_anchor=[1.1,.5,0,0]); ax.set_xlim(-3,2) ax.set_xlabel('log10(Chl[ug/L]+.001)') ax.set_ylabel('Count') ax.set_title(year) # - The most abundant groups are diatoms-1 # - Medium abundance groups includ cryptophytes, prasinophytes, haptophytes, and raphidophytes, and dinoflagellates # - Low abundance groups include cyanobacteria and dictyochophytes # - The dinoflagellate contribution was higher in 2016-2018 compared to 2015. # In[13]: x=np.array([logt(data[el]) for el in hplclist]).T fig,ax=plt.subplots(1,1,figsize=(15,5)) for i in range(0,len(hplclist)): ax.plot(data['dtUTC'],x[:,i],'o',color=cols[i],label=hplclist[i],ms=4) fig.legend(loc='center right',bbox_to_anchor=[1.05,.5]) ax.set_ylim(-3,2) ax.set_xlabel('Date') ax.set_ylabel('log10(Chl [ug/l]+.001)') for i in range(2015,2020): ax.axvspan(dt.datetime(i-1,10,15),dt.datetime(i,3,1), facecolor='lightgrey', alpha=0.5) # - Diatoms-1 peak during the spring bloom, but are high most of thte time. # - Diatoms-2 are lower abundance than diatoms-1 and show a less consistent seasonal pattern; they bloomed in spring 2015 and fall 2018 # - Cryptophytes make up a consistent medium-level contribution # - Raphidophytes bloomed in summer 2018 in many samples # - Dictyochophytes exhibit occasional isolated high values but are often low abundance # - Dinoflagellates also exhibit occasional bloom values and generally low-to-medium abundance # ## Model vs Obs Plots for various model-obs groups # In[61]: fig,ax=plt.subplots(2,2,figsize=(10,9)) fig.subplots_adjust(hspace=.5) ax=ax.flatten() ax[0].plot(data['Diatoms-1']+data['Diatoms-2'],Chl_N*data['mod_diatoms'],'k.') ax[0].set_title('Diatoms (mg Chl/m$^3$)') ax[0].set_xlabel('HPLC') ax[0].set_ylabel('model') ax[0].plot((0,18),(0,18),'r-',alpha=.3) ax[1].plot(data['other'],Chl_N*(data['mod_flagellates']+data['mod_ciliates']),'k.') ax[1].set_title('non-Diatoms (mg Chl/m$^3$)') ax[1].set_xlabel('HPLC') ax[1].set_ylabel('model') ax[1].plot((0,12),(0,12),'r-',alpha=.3) ax[2].plot(data['Diatoms-1']+data['Diatoms-2'],Chl_N*data['mod_diatoms'],'k.') ax[2].set_title('Diatoms (mg Chl/m$^3$)') ax[2].set_xlabel('HPLC') ax[2].set_ylabel('model') ax[2].plot((0,25),(0,25),'r-',alpha=.3) ax[2].set_xlim((0,25)) ax[2].set_ylim((0,25)) ax[3].plot(data['other'],Chl_N*(data['mod_flagellates']+data['mod_ciliates']),'k.') ax[3].set_title('non-Diatoms (mg Chl/m$^3$)') ax[3].set_xlabel('HPLC') ax[3].set_ylabel('model') ax[3].plot((0,12),(0,12),'r-',alpha=.3) ax[3].set_xlim((0,8)) ax[3].set_ylim((0,8)) # In[15]: # log-log model vs obs fig,ax=plt.subplots(1,2,figsize=(12,5)) ax[0].plot(logt(data['Diatoms-1']+data['Diatoms-2']),logt(Chl_N*data['mod_diatoms']),'k.') ax[0].set_title('log10[ Diatoms (mg Chl/m$^3$) + 0.001]') ax[0].set_xlabel('HPLC') ax[0].set_ylabel('model') ax[0].set_xlim(-3.1,2) ax[0].set_ylim(-3.1,2) ax[0].plot((-6,3),(-6,3),'r-',alpha=.3) ax[1].plot(logt(data['other']),logt(Chl_N*(data['mod_flagellates']+data['mod_ciliates'])),'k.') ax[1].set_title('log10[ non-Diatoms (mg Chl/m$^3$) + 0.001]') ax[1].set_xlabel('HPLC') ax[1].set_ylabel('model') ax[1].plot((-6,3),(-6,3),'r-',alpha=.3) ax[1].set_xlim(-3.1,2) ax[1].set_ylim(-3.1,2) # - HPLC agreement for 201905 diatoms is not as strong as for 201805, but non-diatom agreement might be slightly better. # - There is a separation between low and high model values for non-diatoms that is not present in 201812 comparisons # In[16]: # total chlorophyll comparisons fig,ax=plt.subplots(1,2,figsize=(12,5)) ax[0].plot(data['TchlA (ug/L)'],Chl_N*(data['mod_flagellates']+data['mod_ciliates']+data['mod_diatoms']),'k.') ax[0].set_title('Total Chlorophyll (ug/L)') ax[0].set_xlabel('HPLC') ax[0].set_ylabel('model') ax[0].plot((0,20),(0,20),'r-',alpha=.3) ax[1].plot(logt(data['TchlA (ug/L)']),logt(Chl_N*(data['mod_flagellates']+data['mod_ciliates']+data['mod_diatoms'])),'k.') ax[1].set_title('log10[ Total Chlorophyll (ug/L) + 0.001]') ax[1].set_xlabel('HPLC') ax[1].set_ylabel('model') ax[1].plot((-6,5),(-6,5),'r-',alpha=.3) ax[1].set_xlim(-1,2) ax[1].set_ylim(-1,2); # ### By time of year # In[17]: fig,ax=plt.subplots(1,1,figsize=(5,5)) m=ax.scatter(logt(data['TchlA (ug/L)']),logt(Chl_N*(data['mod_flagellates']+data['mod_ciliates']+data['mod_diatoms'])), c=data['yd'],s=5,cmap=cmocean.cm.phase) ax.set_title('log10[ Total Chlorophyll (ug/L) + 0.001] By Year Day') ax.set_xlabel('HPLC') ax.set_ylabel('model') ax.plot((-6,5),(-6,5),'r-',alpha=.3) ax.set_xlim(-3,2) ax.set_ylim(-3,2); fig.colorbar(m) # - Winter chlorophyll agrees with HPLC # - Spring bloom chlorophyll can be too low or too high; this likely reflects poor spring bloom timing # - Summer chlorophyll tends high # - fall chlorophyll tends low # #### look specifically at points where HPLC level is high and model level is low # In[18]: x=logt(data['TchlA (ug/L)']) y=logt(Chl_N*(data['mod_flagellates']+data['mod_ciliates']+data['mod_diatoms'])) ii=data.loc[(y<-.5)&(x>0)].index fig,ax=plt.subplots(1,1,figsize=(5,5)) m=ax.scatter(logt(data.loc[ii,['TchlA (ug/L)']].values), logt(Chl_N*(data.loc[ii,['mod_flagellates']].values+data.loc[ii,['mod_ciliates']].values+\ data.loc[ii,['mod_diatoms']].values)), c=data.loc[ii,['yd']].values,cmap=cmocean.cm.phase) ax.set_title('log10[ Total Chlorophyll (ug/L) + 0.001] By Year Day') ax.set_xlabel('HPLC') ax.set_ylabel('model') ax.plot((-6,5),(-6,5),'r-',alpha=.3) #ax.set_xlim(-3,2) #ax.set_ylim(-3,2); fig.colorbar(m) # In[19]: temp=data.loc[(y<-.5)&(data['Diatoms-1']>=0),['dtUTC','Sample_Number','i','j','k','p','FIL:START TIME YYYY/MM/DD HH:MM:SS','TchlA (ug/L)','Diatoms-1','mod_diatoms','mod_ciliates','mod_flagellates']].copy(deep=True) # In[20]: temp['x']=logt(temp['TchlA (ug/L)']) temp['y']=logt(Chl_N*(temp['mod_flagellates']+temp['mod_ciliates']+temp['mod_diatoms'])) temp # ##### Where are these points in the model domain? # - boundaries (expected) and mixing regions (unexpected) # In[21]: ilen=len(temp) fig,ax=plt.subplots(1,ilen,figsize=(12,4)) indlist=temp.index for i in range(0,ilen): ax[i].pcolormesh(tmask[0,:,:]) ax[i].plot(temp.loc[indlist[i],['i']],temp.loc[indlist[i],['j']],'r*') fig.suptitle('very low model phytoplankton match locations'); # ## Multiple Linear Regression: # # #### HPLC groups and model diatoms # In[22]: ii=(~pd.isnull(data['mod_diatoms']))&(~pd.isnull(data['Diatoms-1']))&(~pd.isnull(data['Diatoms-2']))&(~pd.isnull(data['Cyanobacteria']))&\ (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Prasinophytes']))&(~pd.isnull(data['Haptophytes']))&\ (~pd.isnull(data['Dictyochophytes']))&(~pd.isnull(data['Dinoflagellates']))&(~pd.isnull(data['Raphidophytes'])) A=np.vstack([data.loc[ii]['Diatoms-1'],data.loc[ii]['Diatoms-2'],data.loc[ii]['Cyanobacteria'],data.loc[ii]['Cryptophytes'], data.loc[ii]['Prasinophytes'],data.loc[ii]['Haptophytes'],data.loc[ii]['Dictyochophytes'],data.loc[ii]['Dinoflagellates'], data.loc[ii]['Raphidophytes'],np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T b=data.loc[ii]['mod_diatoms'] m=np.linalg.lstsq(A,b,rcond=None)[0] # In[23]: m # In[24]: clist=('Diatoms-1','Diatoms-2','Cyanobacteria','Cryptophytes','Prasinophytes','Haptophytes', 'Dictyochophytes','Dinoflagellates','Raphidophytes','ones') for c, mm in zip(clist,m): print(c,mm) # #### try again with a subset that showed some relationship (and diatoms-2): # In[25]: ii=(~pd.isnull(data['mod_diatoms']))&(~pd.isnull(data['Diatoms-1']))&(~pd.isnull(data['Diatoms-2']))&\ (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Raphidophytes'])) A=np.vstack([data.loc[ii]['Diatoms-1'],data.loc[ii]['Diatoms-2'],data.loc[ii]['Cryptophytes'], data.loc[ii]['Raphidophytes'],np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T b=data.loc[ii]['mod_diatoms'] m=np.linalg.lstsq(A,b,rcond=None)[0] # In[26]: clist=('Diatoms-1','Diatoms-2','Cryptophytes','Raphidophytes','ones') for c, mm in zip(clist,m): print(c,mm) # In[27]: ii=(~pd.isnull(data['mod_diatoms']))&(~pd.isnull(data['Diatoms-1']))&\ (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Raphidophytes'])) A=np.vstack([data.loc[ii]['Diatoms-1'],data.loc[ii]['Cryptophytes'], data.loc[ii]['Raphidophytes'],np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T b=data.loc[ii]['mod_diatoms'] m=np.linalg.lstsq(A,b,rcond=None)[0] # In[28]: clist=('Diatoms-1','Cryptophytes','Raphidophytes','ones') for c, mm in zip(clist,m): print(c,mm) # #### HPLC groups and model flagellates # In[29]: ii=(~pd.isnull(data['mod_flagellates']))&(~pd.isnull(data['Diatoms-1']))&(~pd.isnull(data['Diatoms-2']))&(~pd.isnull(data['Cyanobacteria']))&\ (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Prasinophytes']))&(~pd.isnull(data['Haptophytes']))&\ (~pd.isnull(data['Dictyochophytes']))&(~pd.isnull(data['Dinoflagellates']))&(~pd.isnull(data['Raphidophytes'])) A=np.vstack([data.loc[ii]['Diatoms-1'],data.loc[ii]['Diatoms-2'],data.loc[ii]['Cyanobacteria'],data.loc[ii]['Cryptophytes'], data.loc[ii]['Prasinophytes'],data.loc[ii]['Haptophytes'],data.loc[ii]['Dictyochophytes'],data.loc[ii]['Dinoflagellates'], data.loc[ii]['Raphidophytes'],np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T b=data.loc[ii]['mod_flagellates'] m=np.linalg.lstsq(A,b,rcond=None)[0] # In[30]: m # In[31]: clist=('Diatoms-1','Diatoms-2','Cyanobacteria','Cryptophytes','Prasinophytes','Haptophytes', 'Dictyochophytes','Dinoflagellates','Raphidophytes','ones') for c, mm in zip(clist,m): print(c,mm) # In[32]: ii=(~pd.isnull(data['mod_flagellates']))&(~pd.isnull(data['Diatoms-1']))&(~pd.isnull(data['Cyanobacteria']))&\ (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Prasinophytes']))&(~pd.isnull(data['Haptophytes']))&\ (~pd.isnull(data['Dictyochophytes']))&(~pd.isnull(data['Dinoflagellates']))&(~pd.isnull(data['Raphidophytes'])) A=np.vstack([data.loc[ii]['Diatoms-1'],data.loc[ii]['Cyanobacteria'],data.loc[ii]['Cryptophytes'], data.loc[ii]['Prasinophytes'],data.loc[ii]['Haptophytes'],data.loc[ii]['Dictyochophytes'], data.loc[ii]['Dinoflagellates'],np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T b=data.loc[ii]['mod_flagellates'] m=np.linalg.lstsq(A,b,rcond=None)[0] # In[33]: clist=('Diatoms-1','Cyanobacteria','Cryptophytes','Prasinophytes','Haptophytes', 'Dictyochophytes','Dinoflagellates','ones') for c, mm in zip(clist,m): print(c,mm) # #### look at Cryptophytes, Prasinophytes, and Haptophytes (+cyanobacteria), the groups that showed a relationship to flagellates in 201812 # In[34]: ii=(~pd.isnull(data['mod_flagellates']))&(~pd.isnull(data['Cyanobacteria']))&\ (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Prasinophytes']))&(~pd.isnull(data['Haptophytes']))&\ (~pd.isnull(data['Dictyochophytes']))&(~pd.isnull(data['Raphidophytes'])) A=np.vstack([data.loc[ii]['Cyanobacteria'],data.loc[ii]['Cryptophytes'], data.loc[ii]['Prasinophytes'],data.loc[ii]['Haptophytes'],data.loc[ii]['Dictyochophytes'], np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T b=data.loc[ii]['mod_flagellates'] m=np.linalg.lstsq(A,b,rcond=None)[0] # In[35]: clist=('Cyanobacteria','Cryptophytes','Prasinophytes','Haptophytes', 'Dictyochophytes','ones') for c, mm in zip(clist,m): print(c,mm) # #### M. rubrum # In[36]: ii=(~pd.isnull(data['mod_ciliates']))&(~pd.isnull(data['Diatoms-1']))&(~pd.isnull(data['Diatoms-2']))&(~pd.isnull(data['Cyanobacteria']))&\ (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Prasinophytes']))&(~pd.isnull(data['Haptophytes']))&\ (~pd.isnull(data['Dictyochophytes']))&(~pd.isnull(data['Dinoflagellates']))&(~pd.isnull(data['Raphidophytes'])) A=np.vstack([data.loc[ii]['Diatoms-1'],data.loc[ii]['Diatoms-2'],data.loc[ii]['Cyanobacteria'],data.loc[ii]['Cryptophytes'], data.loc[ii]['Prasinophytes'],data.loc[ii]['Haptophytes'],data.loc[ii]['Dictyochophytes'],data.loc[ii]['Dinoflagellates'], data.loc[ii]['Raphidophytes'],np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T b=data.loc[ii]['mod_ciliates'] m=np.linalg.lstsq(A,b,rcond=None)[0] # In[37]: clist=('Diatoms-1','Diatoms-2','Cyanobacteria','Cryptophytes','Prasinophytes','Haptophytes', 'Dictyochophytes','Dinoflagellates','Raphidophytes','ones') for c, mm in zip(clist,m): print(c,mm) # Diatoms: # - Cryptophytes # - Diatoms-1 # - Raphidophytes # # Flagellates: # - Cyanobacteria # - Cryptophytes # - Prasinophytes # - Haptophytes # - maybe dinoflagellates # - cyanobacteria make up small fraction of abundance; Nina says leave out # # M. rubrum: # - Cyanobacteria # # None: # - Diatoms-2 # - Dictyochophytes # - Dinoflagellates? # # ##### Nina says Do not include cyanos in flagellate-like group: they are not flagellates and their abundance is low # ### Individual phytoplankton groups compared to model groups (1:1 correspondence not expected) # In[38]: fig,ax=plt.subplots(2,5,figsize=(15,7)) fig.subplots_adjust(wspace=.4) ax=ax.flatten() chplc=('Diatoms-1', 'Diatoms-2','Cyanobacteria','Cryptophytes', 'Prasinophytes', 'Haptophytes', 'Dictyochophytes','Dinoflagellates','Raphidophytes','TchlA (ug/L)') mvar1=Chl_N*data['mod_diatoms'] mvar2=Chl_N*data['mod_flagellates'] for ii in range(0,len(chplc)): ax[ii].plot(logt(data.loc[:,[chplc[ii]]].values),logt(mvar1),'.',ms=1,color='blue',label='Diatoms') ax[ii].plot(logt(data.loc[:,[chplc[ii]]].values),logt(mvar2),'.',ms=1,color='red',label='Flagellates') ax[ii].set_ylabel('Model Class') ax[ii].set_xlabel(chplc[ii]) ax[ii].set_title('log10[Chl(mg/m3)+.001]') ax[ii].plot((-3,1.5),(-3,1.5),'k-',alpha=.2) ax[ii].set_xlim((-3,1.5)) ax[ii].set_ylim((-3,1.5)) ax[ii].set_aspect(1) ax[0].legend() # ### Separate into grouping for which there is some model-data agreement and plot model v obs # In[39]: fig,ax=plt.subplots(1,3,figsize=(12,3)) fig.subplots_adjust(wspace=.5) #ax[0].plot(logt(data['Diatoms-1']+data['Raphidophytes']+.5*data['Cryptophytes']),logt(data['mod_diatoms']),'r.') ax[0].plot(logt(data['Diatoms-1']+data['Raphidophytes']),logt(data['mod_diatoms']),'k.',ms=2) ax[0].set_ylabel('Model diatoms') ax[0].set_xlabel('Diatoms1+Raphido') ax[0].set_title('log10[Chl(mg/m3)+.001]') ax[0].plot((-3,1.5),(-3,1.5),'k-',alpha=.2) ax[0].set_xlim((-3,1.5)) ax[0].set_ylim((-3,1.5)) ax[1].plot(logt(data['Cryptophytes']+data['Prasinophytes']+data['Haptophytes']),logt(data['mod_flagellates']),'k.',ms=2) ax[1].set_ylabel('Model flagellates') ax[1].set_xlabel('Crypto+Prasino+Hapto') ax[1].set_title('log10[Chl(mg/m3)+.001]') ax[1].plot((-3,1.5),(-3,1.5),'k-',alpha=.2) ax[1].set_xlim((-3,1.5)) ax[1].set_ylim((-3,1.5)) ax[2].plot(logt(data['Cyanobacteria']),logt(data['mod_ciliates']),'k.',ms=2) ax[2].set_ylabel('Model M. rubrum') ax[2].set_xlabel('Cyano') ax[2].set_title('log10[Chl(mg/m3)+.001]') ax[2].plot((-3,1.5),(-3,1.5),'k-',alpha=.2) ax[2].set_xlim((-3,1.5)) ax[2].set_ylim((-3,1.5)) # In[40]: data['mod_diatoms_chl']=Chl_N*data['mod_diatoms'] data['mod_flagellates_chl']=Chl_N*data['mod_flagellates'] data['mod_ciliates_chl']=Chl_N*data['mod_ciliates'] data['mod_TChl']=data['mod_diatoms_chl']+data['mod_flagellates_chl']+data['mod_ciliates_chl'] data['CPH']=data['Cryptophytes']+data['Prasinophytes']+data['Haptophytes'] data['DD']=data['Diatoms-1']+data['Diatoms-2'] dfVars=data.loc[:,['Diatoms-1', 'Diatoms-2','Cyanobacteria','Cryptophytes', 'Prasinophytes', 'Haptophytes', 'Dictyochophytes','Dinoflagellates','Raphidophytes','DD','CPH','TchlA (ug/L)', 'mod_diatoms_chl','mod_flagellates_chl','mod_ciliates_chl','mod_TChl']] # ### Variance-Covariance Matrix # In[41]: dfVars.cov() # ##### largest covariances: # Model diatoms and: # - TChlA: 4.185330 # - Diatoms-1+Diatoms-2: 2.953355 # - Diatoms-1: 2.849766 # - Raphidophytes: 1.242441 # # Model flagellates and: # - crypto+hapto+prasino: 0.391340 # - haptophytes: 0.150141 # - cryptophytes: 0.144700 # ### Correlation Coefficient Matrix # In[42]: dfVars.corr() # ##### Strongest correlations: # Model diatoms and: # - Total chla: 0.343112 # - Diatoms-1: 0.329545 # - Diatoms-1+Diatoms-2: 0.329332 # # Model flagellates and: # - crypto+hapto+prasino: 0.377130 # - haptophytes: 0.309359 # - prasinophytes: 0.288055 # - cryptophytes: 0.283430 # - cyanobacteria: 0.271249 (but remember that cyanobacteria abundances are low) # ### Cov matrix with log transformed values: # In[43]: dflog=pd.DataFrame() for el in ['Diatoms-1', 'Diatoms-2','Cyanobacteria','Cryptophytes', 'Prasinophytes', 'Haptophytes', 'Dictyochophytes','Dinoflagellates','Raphidophytes','CPH','TchlA (ug/L)', 'mod_diatoms_chl','mod_flagellates_chl','mod_ciliates_chl','mod_TChl']: dflog[el]=logt(data[el]) dflog.cov() # ### Corr Coeff matrix with log transformed values: # In[44]: dflog.corr() # ### Grouped Model-Obs Comparison: # In[45]: thresh=.8 msize=5 fig,ax=plt.subplots(1,2,figsize=(9,4)) m=ax[0].scatter(logt(data['DD']),logt(data['mod_diatoms_chl']), c=data['yd'],s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366) m=ax[1].scatter(logt(data['CPH']),logt(data['mod_flagellates_chl']), c=data['yd'],s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366) ax[0].set_xlim(-3,2) ax[0].set_ylim(-3,2) ax[1].set_xlim(-3,1.2) ax[1].set_ylim(-3,1.2) ax[0].set_xlabel('Diatoms 1 + Diatoms 2') ax[0].set_ylabel('Model Diatoms') ax[0].set_title('log10[Chl (mg/m$^3$)+.001]') ax[1].set_xlabel('crypto+prasino+hapto') ax[1].set_ylabel('Model flagellates') ax[1].set_title('log10[Chl (mg/m$^3$)+.001]') ax[0].plot((-3,2),(-3+thresh,2+thresh),'-',color='grey') ax[0].plot((-3,2),(-3-thresh,2-thresh),'-',color='grey') ax[1].plot((-3,2),(-3+thresh,2+thresh),'-',color='grey') ax[1].plot((-3,2),(-3-thresh,2-thresh),'-',color='grey') ax[0].plot((-3,2),(-3,2),'k-') ax[1].plot((-3,1.2),(-3,1.2),'k-') plt.tight_layout() fig.colorbar(m,ax=ax[1]) #fig.suptitle('New') # ##### look at summer separately (May-Aug) # - Agrees reasonably well for 201905 # In[46]: thresh=.8 msize=5 data2=data.loc[(data['yd']>yd(dt.datetime(2015,5,1)))&(data['yd']=0)&(data['mod_diatoms_chl']>=0) slope, intercept, r_value, p_value, std_err = spst.linregress(data.loc[ii]['DD'], data.loc[ii]['mod_diatoms_chl']) # In[49]: slope, intercept, r_value*r_value # In[50]: ii=(data['CPH']>=0)&(data['mod_flagellates_chl']>=0) slope, intercept, r_value, p_value, std_err = spst.linregress(data.loc[ii]['CPH'], data.loc[ii]['mod_flagellates_chl']) # In[51]: slope, intercept, r_value*r_value # In[52]: ii=(data['DD']>=0)&(data['mod_diatoms_chl']>=0) slope, intercept, r_value, p_value, std_err = spst.linregress(logt(data.loc[ii]['DD']), logt(data.loc[ii]['mod_diatoms_chl'])) # In[53]: slope, intercept, r_value*r_value # ### Check for spatial patterns in over/under-estimation # In[54]: print('Diatoms/DD') fig,ax=plt.subplots(1,4,figsize=(12,4),gridspec_kw={'width_ratios': [1,1,1,.1],'wspace':.5},) for iax in ax[0:3]: iax.contour(navlon,navlat,tmask[0,:,:],levels=[0.5,],colors='gray') iax.set_xlim(-125.3,-122.5) iax.set_ylim(48,50.5) ihi=logt(data['mod_diatoms_chl'])>(logt(data['DD'])+thresh) ilo=logt(data['mod_diatoms_chl'])<(logt(data['DD'])-thresh) idata=data.loc[(data.DD>=0)&ihi] ax[0].scatter(idata.Lon,idata.Lat,c=idata.yd,s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366) ax[0].set_title('High\n log(mod)>log(obs)+'+str(thresh)) idata=data.loc[(data.DD>=0)&(~ihi)&(~ilo)] ax[1].scatter(idata.Lon,idata.Lat,c=idata.yd,s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366) ax[1].set_title('Medium\n log(obs)-'+str(thresh)+'=0)&ilo] m=ax[2].scatter(idata.Lon,idata.Lat,c=idata.yd,s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366) ax[2].set_title('Low\n log(mod)(logt(data['CPH'])+thresh) ilo=logt(data['mod_flagellates_chl'])<(logt(data['CPH'])-thresh) idata=data.loc[(data.CPH>=0)&ihi] ax[0].scatter(idata.Lon,idata.Lat,c=idata.yd,s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366) ax[0].set_title('High\n log(mod)>log(obs)+'+str(thresh)) idata=data.loc[(data.CPH>=0)&(~ihi)&(~ilo)] ax[1].scatter(idata.Lon,idata.Lat,c=idata.yd,s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366) ax[1].set_title('Medium\n log(obs)-'+str(thresh)+'=0)&ilo] m=ax[2].scatter(idata.Lon,idata.Lat,c=idata.yd,s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366) ax[2].set_title('Low\n log(mod)